df <- read_csv("./data/combinedstatshot.csv")
## Rows: 165 Columns: 36
## ── Column specification ────────────────────────────────────────────────────────
## Delimiter: ","
## chr (12): player, dunk_tot, dunk_pct, rim_tot, rim_pct, rim_asted, other2pt_...
## dbl (24): games, games_started, mp_per_g, fg_per_g, fga_per_g, fg_pct, fg2_p...
## 
## ℹ Use `spec()` to retrieve the full column specification for this data.
## ℹ Specify the column types or set `show_col_types = FALSE` to quiet this message.
df <- df |> select(!c("games_started", "pf_per_g"))
colnames(df)
##  [1] "player"         "dunk_tot"       "dunk_pct"       "rim_tot"       
##  [5] "rim_pct"        "rim_asted"      "other2pt_tot"   "other2pt_pct"  
##  [9] "other2pt_asted" "3pt_tot"        "3pt_pct"        "3pt_asted"     
## [13] "games"          "mp_per_g"       "fg_per_g"       "fga_per_g"     
## [17] "fg_pct"         "fg2_per_g"      "fg2a_per_g"     "fg2_pct"       
## [21] "fg3_per_g"      "fg3a_per_g"     "fg3_pct"        "ft_per_g"      
## [25] "fta_per_g"      "ft_pct"         "orb_per_g"      "drb_per_g"     
## [29] "trb_per_g"      "ast_per_g"      "stl_per_g"      "blk_per_g"     
## [33] "tov_per_g"      "pts_per_g"

Plot 2pt attempts to 3pt attempts

From success script

path <- "~/BruinSports/data/draftdata.csv"
df_career_stats <- read_csv(path)
## Rows: 960 Columns: 23
## ── Column specification ────────────────────────────────────────────────────────
## Delimiter: ","
## chr  (4): team_id, player, college_name, skip
## dbl (19): pick_overall, seasons, g, mp, pts, trb, ast, fg_pct, fg3_pct, ft_p...
## 
## ℹ Use `spec()` to retrieve the full column specification for this data.
## ℹ Specify the column types or set `show_col_types = FALSE` to quiet this message.
drop_cols <- c('team_id', 'skip', 'mp', 'pts', 'trb', 'ast')

df_career_stats <- df_career_stats |> select(!drop_cols)
## Warning: Using an external vector in selections was deprecated in tidyselect 1.1.0.
## ℹ Please use `all_of()` or `any_of()` instead.
##   # Was:
##   data %>% select(drop_cols)
## 
##   # Now:
##   data %>% select(all_of(drop_cols))
## 
## See <https://tidyselect.r-lib.org/reference/faq-external-vector.html>.
## This warning is displayed once every 8 hours.
## Call `lifecycle::last_lifecycle_warnings()` to see where this warning was
## generated.
df_lot_picks <- df_career_stats |> filter(pick_overall < 15)

colnames(df_career_stats)
##  [1] "pick_overall" "player"       "college_name" "seasons"      "g"           
##  [6] "fg_pct"       "fg3_pct"      "ft_pct"       "mp_per_g"     "pts_per_g"   
## [11] "trb_per_g"    "ast_per_g"    "ws"           "ws_per_48"    "bpm"         
## [16] "vorp"         "year"
# plotting the distribution of points + rebounds + asts (PRA) per game
df_lot_picks <- df_lot_picks |> mutate(
  pra_per_g = pts_per_g + trb_per_g + ast_per_g,
  pick_overall = factor(pick_overall)
)

# Group by draft position, and getting the mean
draft_means <- df_lot_picks |> group_by(pick_overall) |>
  summarize(avg_mpg = mean(mp_per_g),
            avg_ppg = mean(pts_per_g),
            avg_trbpg = mean(trb_per_g),
            avg_apg = mean(ast_per_g),
            avg_prapg = mean(pra_per_g))

print(draft_means, n = 14)
## # A tibble: 14 × 6
##    pick_overall avg_mpg avg_ppg avg_trbpg avg_apg avg_prapg
##    <fct>          <dbl>   <dbl>     <dbl>   <dbl>     <dbl>
##  1 1               31.2   18.8       6.52    4.32      29.7
##  2 2               26.9   13.7       4.96    2.88      21.6
##  3 3               30.0   17.6       6.3     3.52      27.4
##  4 4               27.5   12.8       5.39    2.48      20.7
##  5 5               26.0   12.7       4.92    3.55      21.2
##  6 6               23.2   10.2       4.35    2.26      16.8
##  7 7               27.6   13.4       4.88    2.77      21.0
##  8 8               21.1    8.82      3.37    1.77      14.0
##  9 9               24.5   10.9       4.55    2.52      18.0
## 10 10              23.5   10.4       3.61    2.31      16.3
## 11 11              21.1   10.2       3.69    2.16      16.0
## 12 12              24.6   10.6       4.47    2.23      17.3
## 13 13              22.6   10.9       3.99    2.23      17.1
## 14 14              20.3    8.78      3.75    1.3       13.8
df_lot_picks |> ggplot(aes(x = pick_overall, y = pra_per_g)) +
  geom_boxplot() + 
  labs(x = "Draft Pick", y = "Points-Rebounds-Assists Per Game")

Plotting the players in the top 25% in PRA per game

df2 <- left_join(df, df_lot_picks, by = c("player"), suffix = c("_college", "_nba"))
df2 <- df2 |> 
  separate_wider_delim(dunk_tot, delim = "-", names = c("dunk_made", "dunk_attempts"))

df2 <- df2 |>
  separate_wider_delim(rim_tot, delim = "-", names = c("rim_made", "rim_attempts"))

df2 <- df2 |>
  separate_wider_delim(other2pt_tot, delim = "-", names = c("other2pt_made", "other2pt_attempts"))

df2 <-df2 |>
  mutate(across(c(dunk_made, dunk_attempts, rim_made, rim_attempts, other2pt_made, other2pt_attempts), as.numeric))

defining bust metric

df2 <- df2 |> mutate(
  vorp_per_g = vorp / g
)

df_top_players <- df2 |> 
  group_by(pick_overall) |>
  summarize(across(c(pts_per_g_nba, trb_per_g_nba, ast_per_g_nba, pra_per_g, vorp_per_g), function(x) quantile(x, probs = 0.7)))

df_bottom_players <- df2 |> 
  group_by(pick_overall) |>
  summarize(across(c(pts_per_g_nba, trb_per_g_nba, ast_per_g_nba, pra_per_g, vorp_per_g), function(x) quantile(x, probs = 0.3)))

# metric favors big men
# make the rebound percentile higher

is_not_bust <- function(pick_number, df_top_players = df_top_players, df2 = df2) {
  ppg <- df_top_players |> pull(pts_per_g_nba)
  apg <- df_top_players |> pull(ast_per_g_nba)
  prapg <- df_top_players |> pull(pra_per_g)
  
  df_top_rb <- df2 |> 
    group_by(pick_overall) |>
    summarize(trb_per_g = quantile(trb_per_g_nba, probs = 0.8))
  
  rpg <- df_top_rb |> pull(trb_per_g)
  
  df_top_vorp <- df2 |> 
    group_by(pick_overall) |>
    summarize(vorp_per_g = median(vorp_per_g))
  
  vorppg <- df_top_vorp |> pull(vorp_per_g)
  
  df <- df2 |> filter(pick_overall == pick_number) |>
  filter(pra_per_g >= prapg[[pick_number]] | ast_per_g_nba >= apg[[pick_number]] | trb_per_g_nba >= rpg[[pick_number]] | pts_per_g_nba >= ppg[[pick_number]]) |>
    filter(vorp_per_g >= vorppg[[pick_number]]) 
  
  # must also have played at least most of their career in the nba
  df <- df |> filter(seasons >= 4 / 5 * (2023-year))
  
  return(df)
}

is_bust <- function(pick_number, df_bottom_players = df_bottom_players, df2 = df2) {
  ppg <- df_bottom_players |> pull(pts_per_g_nba)
  
  apg <- df_bottom_players |> pull(ast_per_g_nba)
  prapg <- df_bottom_players |> pull(pra_per_g)
  
  df_bottom_rb <- df2 |> 
    group_by(pick_overall) |>
    summarize(trb_per_g = quantile(trb_per_g_nba, probs = 0.4))
  
  rpg <- df_bottom_rb |> pull(trb_per_g)
  
  df_bottom_vorp <- df2 |> 
    group_by(pick_overall) |>
    summarize(vorp_per_g = quantile(vorp_per_g, probs = 0.3))
  
  vorppg <- df_bottom_vorp |> pull(vorp_per_g)
  
  # playing less than half the seasons since drafted makes you a bust
  df <- df2 |> filter(pick_overall == pick_number) |>
  filter((pra_per_g < prapg[[pick_number]] & ast_per_g_nba < apg[[pick_number]] & trb_per_g_nba < rpg[[pick_number]] & pts_per_g_nba < ppg[[pick_number]] & vorp_per_g < vorppg[[pick_number]]) | seasons < 1 / 2 * (2023-year))
  
  
  return(df)
}
# first overall picks (not bust)
df_pick_1 <- is_not_bust(1, df_top_players, df2)

# first overall picks (bust)
df_pick_1_bust <- is_bust(1, df_bottom_players, df2)

df_pick_1
## # A tibble: 7 × 55
##   player          dunk_made dunk_attempts dunk_pct rim_made rim_attempts rim_pct
##   <chr>               <dbl>         <dbl> <chr>       <dbl>        <dbl> <chr>  
## 1 John Wall              33            36 91.7%         116          182 63.7%  
## 2 Kyrie Irving            0             0 0%             26           39 66.7%  
## 3 Anthony Davis          96            98 98.0%         152          174 87.4%  
## 4 Karl-Anthony T…        22            24 91.7%          87          121 71.9%  
## 5 Ben Simmons            56            61 91.8%         159          220 72.3%  
## 6 Zion Williamson        72            79 91.1%         247          313 78.9%  
## 7 Anthony Edwards        27            27 100.0%         89          129 69.0%  
## # ℹ 48 more variables: rim_asted <chr>, other2pt_made <dbl>,
## #   other2pt_attempts <dbl>, other2pt_pct <chr>, other2pt_asted <chr>,
## #   `3pt_tot` <chr>, `3pt_pct` <chr>, `3pt_asted` <chr>, games <dbl>,
## #   mp_per_g_college <dbl>, fg_per_g <dbl>, fga_per_g <dbl>,
## #   fg_pct_college <dbl>, fg2_per_g <dbl>, fg2a_per_g <dbl>, fg2_pct <dbl>,
## #   fg3_per_g <dbl>, fg3a_per_g <dbl>, fg3_pct_college <dbl>, ft_per_g <dbl>,
## #   fta_per_g <dbl>, ft_pct_college <dbl>, orb_per_g <dbl>, drb_per_g <dbl>, …
df_pick_1_bust
## # A tibble: 1 × 55
##   player          dunk_made dunk_attempts dunk_pct rim_made rim_attempts rim_pct
##   <chr>               <dbl>         <dbl> <chr>       <dbl>        <dbl> <chr>  
## 1 Anthony Bennett        53            58 91.4%         100          140 71.4%  
## # ℹ 48 more variables: rim_asted <chr>, other2pt_made <dbl>,
## #   other2pt_attempts <dbl>, other2pt_pct <chr>, other2pt_asted <chr>,
## #   `3pt_tot` <chr>, `3pt_pct` <chr>, `3pt_asted` <chr>, games <dbl>,
## #   mp_per_g_college <dbl>, fg_per_g <dbl>, fga_per_g <dbl>,
## #   fg_pct_college <dbl>, fg2_per_g <dbl>, fg2a_per_g <dbl>, fg2_pct <dbl>,
## #   fg3_per_g <dbl>, fg3a_per_g <dbl>, fg3_pct_college <dbl>, ft_per_g <dbl>,
## #   fta_per_g <dbl>, ft_pct_college <dbl>, orb_per_g <dbl>, drb_per_g <dbl>, …
# second overall
df_pick_2 <- is_not_bust(2, df_top_players, df2)
df_pick_2_bust <- is_bust(2, df_bottom_players, df2)

df_pick_2
## # A tibble: 5 × 55
##   player          dunk_made dunk_attempts dunk_pct rim_made rim_attempts rim_pct
##   <chr>               <dbl>         <dbl> <chr>       <dbl>        <dbl> <chr>  
## 1 D'Angelo Russe…         4             4 100.0%         70          110 63.6%  
## 2 Brandon Ingram         17            17 100.0%         69          117 59.0%  
## 3 Lonzo Ball             37            40 92.5%          94          120 78.3%  
## 4 Ja Morant              28            31 90.3%         160          264 60.6%  
## 5 Chet Holmgren          57            57 100.0%        105          125 84.0%  
## # ℹ 48 more variables: rim_asted <chr>, other2pt_made <dbl>,
## #   other2pt_attempts <dbl>, other2pt_pct <chr>, other2pt_asted <chr>,
## #   `3pt_tot` <chr>, `3pt_pct` <chr>, `3pt_asted` <chr>, games <dbl>,
## #   mp_per_g_college <dbl>, fg_per_g <dbl>, fga_per_g <dbl>,
## #   fg_pct_college <dbl>, fg2_per_g <dbl>, fg2a_per_g <dbl>, fg2_pct <dbl>,
## #   fg3_per_g <dbl>, fg3a_per_g <dbl>, fg3_pct_college <dbl>, ft_per_g <dbl>,
## #   fta_per_g <dbl>, ft_pct_college <dbl>, orb_per_g <dbl>, drb_per_g <dbl>, …
df_pick_2_bust
## # A tibble: 1 × 55
##   player          dunk_made dunk_attempts dunk_pct rim_made rim_attempts rim_pct
##   <chr>               <dbl>         <dbl> <chr>       <dbl>        <dbl> <chr>  
## 1 Derrick Willia…        56            60 93.3%         135          188 71.8%  
## # ℹ 48 more variables: rim_asted <chr>, other2pt_made <dbl>,
## #   other2pt_attempts <dbl>, other2pt_pct <chr>, other2pt_asted <chr>,
## #   `3pt_tot` <chr>, `3pt_pct` <chr>, `3pt_asted` <chr>, games <dbl>,
## #   mp_per_g_college <dbl>, fg_per_g <dbl>, fga_per_g <dbl>,
## #   fg_pct_college <dbl>, fg2_per_g <dbl>, fg2a_per_g <dbl>, fg2_pct <dbl>,
## #   fg3_per_g <dbl>, fg3a_per_g <dbl>, fg3_pct_college <dbl>, ft_per_g <dbl>,
## #   fta_per_g <dbl>, ft_pct_college <dbl>, orb_per_g <dbl>, drb_per_g <dbl>, …
# third overall
df_pick_3 <- is_not_bust(3, df_top_players, df2)
df_pick_3_bust <- is_bust(3, df_bottom_players, df2)

df_pick_3
## # A tibble: 4 × 55
##   player       dunk_made dunk_attempts dunk_pct rim_made rim_attempts rim_pct
##   <chr>            <dbl>         <dbl> <chr>       <dbl>        <dbl> <chr>  
## 1 Bradley Beal        18            20 90.0%          89          137 65.0%  
## 2 Joel Embiid         30            30 100.0%         80           99 80.8%  
## 3 Jayson Tatum        18            21 85.7%          79          126 62.7%  
## 4 Evan Mobley         63            66 95.5%         113          144 78.5%  
## # ℹ 48 more variables: rim_asted <chr>, other2pt_made <dbl>,
## #   other2pt_attempts <dbl>, other2pt_pct <chr>, other2pt_asted <chr>,
## #   `3pt_tot` <chr>, `3pt_pct` <chr>, `3pt_asted` <chr>, games <dbl>,
## #   mp_per_g_college <dbl>, fg_per_g <dbl>, fga_per_g <dbl>,
## #   fg_pct_college <dbl>, fg2_per_g <dbl>, fg2a_per_g <dbl>, fg2_pct <dbl>,
## #   fg3_per_g <dbl>, fg3a_per_g <dbl>, fg3_pct_college <dbl>, ft_per_g <dbl>,
## #   fta_per_g <dbl>, ft_pct_college <dbl>, orb_per_g <dbl>, drb_per_g <dbl>, …
df_pick_3_bust
## # A tibble: 1 × 55
##   player        dunk_made dunk_attempts dunk_pct rim_made rim_attempts rim_pct
##   <chr>             <dbl>         <dbl> <chr>       <dbl>        <dbl> <chr>  
## 1 Jahlil Okafor        64            67 95.5%         213          270 78.9%  
## # ℹ 48 more variables: rim_asted <chr>, other2pt_made <dbl>,
## #   other2pt_attempts <dbl>, other2pt_pct <chr>, other2pt_asted <chr>,
## #   `3pt_tot` <chr>, `3pt_pct` <chr>, `3pt_asted` <chr>, games <dbl>,
## #   mp_per_g_college <dbl>, fg_per_g <dbl>, fga_per_g <dbl>,
## #   fg_pct_college <dbl>, fg2_per_g <dbl>, fg2a_per_g <dbl>, fg2_pct <dbl>,
## #   fg3_per_g <dbl>, fg3a_per_g <dbl>, fg3_pct_college <dbl>, ft_per_g <dbl>,
## #   fta_per_g <dbl>, ft_pct_college <dbl>, orb_per_g <dbl>, drb_per_g <dbl>, …
# fourth overall
df_pick_4 <- is_not_bust(4, df_top_players, df2)
df_pick_4_bust <- is_bust(4, df_bottom_players, df2)

df_pick_4
## # A tibble: 4 × 55
##   player          dunk_made dunk_attempts dunk_pct rim_made rim_attempts rim_pct
##   <chr>               <dbl>         <dbl> <chr>       <dbl>        <dbl> <chr>  
## 1 Aaron Gordon           54            56 96.4%         137          198 69.2%  
## 2 Jaren Jackson …        31            31 100.0%         61           93 65.6%  
## 3 Scottie Barnes         19            21 90.5%          61           89 68.5%  
## 4 Keegan Murray          63            67 94.0%         196          277 70.8%  
## # ℹ 48 more variables: rim_asted <chr>, other2pt_made <dbl>,
## #   other2pt_attempts <dbl>, other2pt_pct <chr>, other2pt_asted <chr>,
## #   `3pt_tot` <chr>, `3pt_pct` <chr>, `3pt_asted` <chr>, games <dbl>,
## #   mp_per_g_college <dbl>, fg_per_g <dbl>, fga_per_g <dbl>,
## #   fg_pct_college <dbl>, fg2_per_g <dbl>, fg2a_per_g <dbl>, fg2_pct <dbl>,
## #   fg3_per_g <dbl>, fg3a_per_g <dbl>, fg3_pct_college <dbl>, ft_per_g <dbl>,
## #   fta_per_g <dbl>, ft_pct_college <dbl>, orb_per_g <dbl>, drb_per_g <dbl>, …
df_pick_4_bust
## # A tibble: 0 × 55
## # ℹ 55 variables: player <chr>, dunk_made <dbl>, dunk_attempts <dbl>,
## #   dunk_pct <chr>, rim_made <dbl>, rim_attempts <dbl>, rim_pct <chr>,
## #   rim_asted <chr>, other2pt_made <dbl>, other2pt_attempts <dbl>,
## #   other2pt_pct <chr>, other2pt_asted <chr>, 3pt_tot <chr>, 3pt_pct <chr>,
## #   3pt_asted <chr>, games <dbl>, mp_per_g_college <dbl>, fg_per_g <dbl>,
## #   fga_per_g <dbl>, fg_pct_college <dbl>, fg2_per_g <dbl>, fg2a_per_g <dbl>,
## #   fg2_pct <dbl>, fg3_per_g <dbl>, fg3a_per_g <dbl>, fg3_pct_college <dbl>, …
# fifth overall
df_pick_5 <- is_not_bust(5, df_top_players, df2)
df_pick_5_bust <- is_bust(5, df_bottom_players, df2)

df_pick_5
## # A tibble: 3 × 55
##   player          dunk_made dunk_attempts dunk_pct rim_made rim_attempts rim_pct
##   <chr>               <dbl>         <dbl> <chr>       <dbl>        <dbl> <chr>  
## 1 DeMarcus Cousi…        53            57 93.0%         144          189 76.2%  
## 2 De'Aaron Fox           20            21 95.2%         131          203 64.5%  
## 3 Trae Young              0             0 0%            105          201 52.2%  
## # ℹ 48 more variables: rim_asted <chr>, other2pt_made <dbl>,
## #   other2pt_attempts <dbl>, other2pt_pct <chr>, other2pt_asted <chr>,
## #   `3pt_tot` <chr>, `3pt_pct` <chr>, `3pt_asted` <chr>, games <dbl>,
## #   mp_per_g_college <dbl>, fg_per_g <dbl>, fga_per_g <dbl>,
## #   fg_pct_college <dbl>, fg2_per_g <dbl>, fg2a_per_g <dbl>, fg2_pct <dbl>,
## #   fg3_per_g <dbl>, fg3a_per_g <dbl>, fg3_pct_college <dbl>, ft_per_g <dbl>,
## #   fta_per_g <dbl>, ft_pct_college <dbl>, orb_per_g <dbl>, drb_per_g <dbl>, …
df_pick_5_bust
## # A tibble: 1 × 55
##   player          dunk_made dunk_attempts dunk_pct rim_made rim_attempts rim_pct
##   <chr>               <dbl>         <dbl> <chr>       <dbl>        <dbl> <chr>  
## 1 Thomas Robinson        70            83 84.3%         169          262 64.5%  
## # ℹ 48 more variables: rim_asted <chr>, other2pt_made <dbl>,
## #   other2pt_attempts <dbl>, other2pt_pct <chr>, other2pt_asted <chr>,
## #   `3pt_tot` <chr>, `3pt_pct` <chr>, `3pt_asted` <chr>, games <dbl>,
## #   mp_per_g_college <dbl>, fg_per_g <dbl>, fga_per_g <dbl>,
## #   fg_pct_college <dbl>, fg2_per_g <dbl>, fg2a_per_g <dbl>, fg2_pct <dbl>,
## #   fg3_per_g <dbl>, fg3a_per_g <dbl>, fg3_pct_college <dbl>, ft_per_g <dbl>,
## #   fta_per_g <dbl>, ft_pct_college <dbl>, orb_per_g <dbl>, drb_per_g <dbl>, …
# sixth overall
df_pick_6 <- is_not_bust(6, df_top_players, df2)
df_pick_6_bust <- is_bust(6, df_bottom_players, df2)

df_pick_6
## # A tibble: 5 × 55
##   player         dunk_made dunk_attempts dunk_pct rim_made rim_attempts rim_pct
##   <chr>              <dbl>         <dbl> <chr>       <dbl>        <dbl> <chr>  
## 1 Damian Lillard        13            17 76.5%          98          169 58.0%  
## 2 Nerlens Noel          48            50 96.0%          76           99 76.8%  
## 3 Marcus Smart          16            18 88.9%          78          110 70.9%  
## 4 Buddy Hield           18            22 81.8%         119          178 66.9%  
## 5 Onyeka Okongwu        58            61 95.1%         135          186 72.6%  
## # ℹ 48 more variables: rim_asted <chr>, other2pt_made <dbl>,
## #   other2pt_attempts <dbl>, other2pt_pct <chr>, other2pt_asted <chr>,
## #   `3pt_tot` <chr>, `3pt_pct` <chr>, `3pt_asted` <chr>, games <dbl>,
## #   mp_per_g_college <dbl>, fg_per_g <dbl>, fga_per_g <dbl>,
## #   fg_pct_college <dbl>, fg2_per_g <dbl>, fg2a_per_g <dbl>, fg2_pct <dbl>,
## #   fg3_per_g <dbl>, fg3a_per_g <dbl>, fg3_pct_college <dbl>, ft_per_g <dbl>,
## #   fta_per_g <dbl>, ft_pct_college <dbl>, orb_per_g <dbl>, drb_per_g <dbl>, …
df_pick_6_bust
## # A tibble: 1 × 55
##   player    dunk_made dunk_attempts dunk_pct rim_made rim_attempts rim_pct
##   <chr>         <dbl>         <dbl> <chr>       <dbl>        <dbl> <chr>  
## 1 Ekpe Udoh        30            32 93.8%          78          109 71.6%  
## # ℹ 48 more variables: rim_asted <chr>, other2pt_made <dbl>,
## #   other2pt_attempts <dbl>, other2pt_pct <chr>, other2pt_asted <chr>,
## #   `3pt_tot` <chr>, `3pt_pct` <chr>, `3pt_asted` <chr>, games <dbl>,
## #   mp_per_g_college <dbl>, fg_per_g <dbl>, fga_per_g <dbl>,
## #   fg_pct_college <dbl>, fg2_per_g <dbl>, fg2a_per_g <dbl>, fg2_pct <dbl>,
## #   fg3_per_g <dbl>, fg3a_per_g <dbl>, fg3_pct_college <dbl>, ft_per_g <dbl>,
## #   fta_per_g <dbl>, ft_pct_college <dbl>, orb_per_g <dbl>, drb_per_g <dbl>, …
# seventh overall
df_pick_7 <- is_not_bust(7, df_top_players, df2)
df_pick_7_bust <- is_bust(7, df_bottom_players, df2)

df_pick_7
## # A tibble: 3 × 55
##   player          dunk_made dunk_attempts dunk_pct rim_made rim_attempts rim_pct
##   <chr>               <dbl>         <dbl> <chr>       <dbl>        <dbl> <chr>  
## 1 Julius Randle          37            40 92.5%         132          197 67.0%  
## 2 Jamal Murray           18            19 94.7%          77          111 69.4%  
## 3 Lauri Markkanen        20            24 83.3%          65          100 65.0%  
## # ℹ 48 more variables: rim_asted <chr>, other2pt_made <dbl>,
## #   other2pt_attempts <dbl>, other2pt_pct <chr>, other2pt_asted <chr>,
## #   `3pt_tot` <chr>, `3pt_pct` <chr>, `3pt_asted` <chr>, games <dbl>,
## #   mp_per_g_college <dbl>, fg_per_g <dbl>, fga_per_g <dbl>,
## #   fg_pct_college <dbl>, fg2_per_g <dbl>, fg2a_per_g <dbl>, fg2_pct <dbl>,
## #   fg3_per_g <dbl>, fg3a_per_g <dbl>, fg3_pct_college <dbl>, ft_per_g <dbl>,
## #   fta_per_g <dbl>, ft_pct_college <dbl>, orb_per_g <dbl>, drb_per_g <dbl>, …
df_pick_7_bust
## # A tibble: 1 × 55
##   player       dunk_made dunk_attempts dunk_pct rim_made rim_attempts rim_pct
##   <chr>            <dbl>         <dbl> <chr>       <dbl>        <dbl> <chr>  
## 1 Ben McLemore        44            45 97.8%          90          126 71.4%  
## # ℹ 48 more variables: rim_asted <chr>, other2pt_made <dbl>,
## #   other2pt_attempts <dbl>, other2pt_pct <chr>, other2pt_asted <chr>,
## #   `3pt_tot` <chr>, `3pt_pct` <chr>, `3pt_asted` <chr>, games <dbl>,
## #   mp_per_g_college <dbl>, fg_per_g <dbl>, fga_per_g <dbl>,
## #   fg_pct_college <dbl>, fg2_per_g <dbl>, fg2a_per_g <dbl>, fg2_pct <dbl>,
## #   fg3_per_g <dbl>, fg3a_per_g <dbl>, fg3_pct_college <dbl>, ft_per_g <dbl>,
## #   fta_per_g <dbl>, ft_pct_college <dbl>, orb_per_g <dbl>, drb_per_g <dbl>, …
# eighth overall
df_pick_8 <- is_not_bust(8, df_top_players, df2)
df_pick_8_bust <- is_bust(8, df_bottom_players, df2)

df_pick_8
## # A tibble: 3 × 55
##   player          dunk_made dunk_attempts dunk_pct rim_made rim_attempts rim_pct
##   <chr>               <dbl>         <dbl> <chr>       <dbl>        <dbl> <chr>  
## 1 Al-Farouq Aminu        46            48 95.8%         112          173 64.7%  
## 2 Kentavious Cal…        15            16 93.8%          63           94 67.0%  
## 3 Franz Wagner           11            11 100.0%         63           93 67.7%  
## # ℹ 48 more variables: rim_asted <chr>, other2pt_made <dbl>,
## #   other2pt_attempts <dbl>, other2pt_pct <chr>, other2pt_asted <chr>,
## #   `3pt_tot` <chr>, `3pt_pct` <chr>, `3pt_asted` <chr>, games <dbl>,
## #   mp_per_g_college <dbl>, fg_per_g <dbl>, fga_per_g <dbl>,
## #   fg_pct_college <dbl>, fg2_per_g <dbl>, fg2a_per_g <dbl>, fg2_pct <dbl>,
## #   fg3_per_g <dbl>, fg3a_per_g <dbl>, fg3_pct_college <dbl>, ft_per_g <dbl>,
## #   fta_per_g <dbl>, ft_pct_college <dbl>, orb_per_g <dbl>, drb_per_g <dbl>, …
df_pick_8_bust
## # A tibble: 0 × 55
## # ℹ 55 variables: player <chr>, dunk_made <dbl>, dunk_attempts <dbl>,
## #   dunk_pct <chr>, rim_made <dbl>, rim_attempts <dbl>, rim_pct <chr>,
## #   rim_asted <chr>, other2pt_made <dbl>, other2pt_attempts <dbl>,
## #   other2pt_pct <chr>, other2pt_asted <chr>, 3pt_tot <chr>, 3pt_pct <chr>,
## #   3pt_asted <chr>, games <dbl>, mp_per_g_college <dbl>, fg_per_g <dbl>,
## #   fga_per_g <dbl>, fg_pct_college <dbl>, fg2_per_g <dbl>, fg2a_per_g <dbl>,
## #   fg2_pct <dbl>, fg3_per_g <dbl>, fg3a_per_g <dbl>, fg3_pct_college <dbl>, …
# ninth overall
df_pick_9 <- is_not_bust(9, df_top_players, df2)
df_pick_9_bust <- is_bust(9, df_bottom_players, df2)

df_pick_9
## # A tibble: 5 × 55
##   player         dunk_made dunk_attempts dunk_pct rim_made rim_attempts rim_pct
##   <chr>              <dbl>         <dbl> <chr>       <dbl>        <dbl> <chr>  
## 1 Gordon Hayward        19            20 95.0%          89          128 69.5%  
## 2 Kemba Walker           3             3 100.0%        115          196 58.7%  
## 3 Andre Drummond        80            89 89.9%         130          185 70.3%  
## 4 Trey Burke             9             9 100.0%         67          105 63.8%  
## 5 Jakob Poeltl          32            34 94.1%         199          284 70.1%  
## # ℹ 48 more variables: rim_asted <chr>, other2pt_made <dbl>,
## #   other2pt_attempts <dbl>, other2pt_pct <chr>, other2pt_asted <chr>,
## #   `3pt_tot` <chr>, `3pt_pct` <chr>, `3pt_asted` <chr>, games <dbl>,
## #   mp_per_g_college <dbl>, fg_per_g <dbl>, fga_per_g <dbl>,
## #   fg_pct_college <dbl>, fg2_per_g <dbl>, fg2a_per_g <dbl>, fg2_pct <dbl>,
## #   fg3_per_g <dbl>, fg3a_per_g <dbl>, fg3_pct_college <dbl>, ft_per_g <dbl>,
## #   fta_per_g <dbl>, ft_pct_college <dbl>, orb_per_g <dbl>, drb_per_g <dbl>, …
df_pick_9_bust
## # A tibble: 1 × 55
##   player     dunk_made dunk_attempts dunk_pct rim_made rim_attempts rim_pct
##   <chr>          <dbl>         <dbl> <chr>       <dbl>        <dbl> <chr>  
## 1 Kevin Knox        18            20 90.0%          65           99 65.7%  
## # ℹ 48 more variables: rim_asted <chr>, other2pt_made <dbl>,
## #   other2pt_attempts <dbl>, other2pt_pct <chr>, other2pt_asted <chr>,
## #   `3pt_tot` <chr>, `3pt_pct` <chr>, `3pt_asted` <chr>, games <dbl>,
## #   mp_per_g_college <dbl>, fg_per_g <dbl>, fga_per_g <dbl>,
## #   fg_pct_college <dbl>, fg2_per_g <dbl>, fg2a_per_g <dbl>, fg2_pct <dbl>,
## #   fg3_per_g <dbl>, fg3a_per_g <dbl>, fg3_pct_college <dbl>, ft_per_g <dbl>,
## #   fta_per_g <dbl>, ft_pct_college <dbl>, orb_per_g <dbl>, drb_per_g <dbl>, …
# tenth overall
df_pick_10 <- is_not_bust(10, df_top_players, df2)
df_pick_10_bust <- is_bust(10, df_bottom_players, df2)

df_pick_10
## # A tibble: 5 × 55
##   player        dunk_made dunk_attempts dunk_pct rim_made rim_attempts rim_pct
##   <chr>             <dbl>         <dbl> <chr>       <dbl>        <dbl> <chr>  
## 1 Paul George          18            22 81.8%          70          106 66.0%  
## 2 CJ McCollum           3             3 100.0%         34           63 54.0%  
## 3 Elfrid Payton        21            24 87.5%         169          247 68.4%  
## 4 Mikal Bridges        35            42 83.3%         109          161 67.7%  
## 5 Jalen Smith          49            52 94.2%         114          158 72.2%  
## # ℹ 48 more variables: rim_asted <chr>, other2pt_made <dbl>,
## #   other2pt_attempts <dbl>, other2pt_pct <chr>, other2pt_asted <chr>,
## #   `3pt_tot` <chr>, `3pt_pct` <chr>, `3pt_asted` <chr>, games <dbl>,
## #   mp_per_g_college <dbl>, fg_per_g <dbl>, fga_per_g <dbl>,
## #   fg_pct_college <dbl>, fg2_per_g <dbl>, fg2a_per_g <dbl>, fg2_pct <dbl>,
## #   fg3_per_g <dbl>, fg3a_per_g <dbl>, fg3_pct_college <dbl>, ft_per_g <dbl>,
## #   fta_per_g <dbl>, ft_pct_college <dbl>, orb_per_g <dbl>, drb_per_g <dbl>, …
df_pick_10_bust
## # A tibble: 2 × 55
##   player          dunk_made dunk_attempts dunk_pct rim_made rim_attempts rim_pct
##   <chr>               <dbl>         <dbl> <chr>       <dbl>        <dbl> <chr>  
## 1 Ziaire Williams        10            11 90.9%          26           49 53.1%  
## 2 Johnny Davis           16            19 84.2%          89          143 62.2%  
## # ℹ 48 more variables: rim_asted <chr>, other2pt_made <dbl>,
## #   other2pt_attempts <dbl>, other2pt_pct <chr>, other2pt_asted <chr>,
## #   `3pt_tot` <chr>, `3pt_pct` <chr>, `3pt_asted` <chr>, games <dbl>,
## #   mp_per_g_college <dbl>, fg_per_g <dbl>, fga_per_g <dbl>,
## #   fg_pct_college <dbl>, fg2_per_g <dbl>, fg2a_per_g <dbl>, fg2_pct <dbl>,
## #   fg3_per_g <dbl>, fg3a_per_g <dbl>, fg3_pct_college <dbl>, ft_per_g <dbl>,
## #   fta_per_g <dbl>, ft_pct_college <dbl>, orb_per_g <dbl>, drb_per_g <dbl>, …
# eleventh overall
df_pick_11 <- is_not_bust(11, df_top_players, df2)
df_pick_11_bust <- is_bust(11, df_bottom_players, df2)

df_pick_11
## # A tibble: 4 × 55
##   player          dunk_made dunk_attempts dunk_pct rim_made rim_attempts rim_pct
##   <chr>               <dbl>         <dbl> <chr>       <dbl>        <dbl> <chr>  
## 1 Klay Thompson           8             8 100.0%         66          110 60.0%  
## 2 Myles Turner           11            13 84.6%          40           54 74.1%  
## 3 Domantas Sabon…        22            24 91.7%         157          214 73.4%  
## 4 Shai Gilgeous-…        11            11 100.0%        108          182 59.3%  
## # ℹ 48 more variables: rim_asted <chr>, other2pt_made <dbl>,
## #   other2pt_attempts <dbl>, other2pt_pct <chr>, other2pt_asted <chr>,
## #   `3pt_tot` <chr>, `3pt_pct` <chr>, `3pt_asted` <chr>, games <dbl>,
## #   mp_per_g_college <dbl>, fg_per_g <dbl>, fga_per_g <dbl>,
## #   fg_pct_college <dbl>, fg2_per_g <dbl>, fg2a_per_g <dbl>, fg2_pct <dbl>,
## #   fg3_per_g <dbl>, fg3a_per_g <dbl>, fg3_pct_college <dbl>, ft_per_g <dbl>,
## #   fta_per_g <dbl>, ft_pct_college <dbl>, orb_per_g <dbl>, drb_per_g <dbl>, …
df_pick_11_bust
## # A tibble: 2 × 55
##   player          dunk_made dunk_attempts dunk_pct rim_made rim_attempts rim_pct
##   <chr>               <dbl>         <dbl> <chr>       <dbl>        <dbl> <chr>  
## 1 James Bouknight        12            12 100.0%         52           79 65.8%  
## 2 Jett Howard             6             6 100.0%         29           47 61.7%  
## # ℹ 48 more variables: rim_asted <chr>, other2pt_made <dbl>,
## #   other2pt_attempts <dbl>, other2pt_pct <chr>, other2pt_asted <chr>,
## #   `3pt_tot` <chr>, `3pt_pct` <chr>, `3pt_asted` <chr>, games <dbl>,
## #   mp_per_g_college <dbl>, fg_per_g <dbl>, fga_per_g <dbl>,
## #   fg_pct_college <dbl>, fg2_per_g <dbl>, fg2a_per_g <dbl>, fg2_pct <dbl>,
## #   fg3_per_g <dbl>, fg3a_per_g <dbl>, fg3_pct_college <dbl>, ft_per_g <dbl>,
## #   fta_per_g <dbl>, ft_pct_college <dbl>, orb_per_g <dbl>, drb_per_g <dbl>, …
# twelveth overall
df_pick_12 <- is_not_bust(12, df_top_players, df2)
df_pick_12_bust <- is_bust(12, df_bottom_players, df2)

df_pick_12
## # A tibble: 5 × 55
##   player          dunk_made dunk_attempts dunk_pct rim_made rim_attempts rim_pct
##   <chr>               <dbl>         <dbl> <chr>       <dbl>        <dbl> <chr>  
## 1 Steven Adams           29            33 87.9%          85          129 65.9%  
## 2 Miles Bridges          30            35 85.7%          84          128 65.6%  
## 3 Tyrese Halibur…         7             8 87.5%          46           62 74.2%  
## 4 Jalen Williams         25            27 92.6%         124          186 66.7%  
## 5 Dereck Lively …        54            55 98.2%          74           96 77.1%  
## # ℹ 48 more variables: rim_asted <chr>, other2pt_made <dbl>,
## #   other2pt_attempts <dbl>, other2pt_pct <chr>, other2pt_asted <chr>,
## #   `3pt_tot` <chr>, `3pt_pct` <chr>, `3pt_asted` <chr>, games <dbl>,
## #   mp_per_g_college <dbl>, fg_per_g <dbl>, fga_per_g <dbl>,
## #   fg_pct_college <dbl>, fg2_per_g <dbl>, fg2a_per_g <dbl>, fg2_pct <dbl>,
## #   fg3_per_g <dbl>, fg3a_per_g <dbl>, fg3_pct_college <dbl>, ft_per_g <dbl>,
## #   fta_per_g <dbl>, ft_pct_college <dbl>, orb_per_g <dbl>, drb_per_g <dbl>, …
df_pick_12_bust
## # A tibble: 1 × 55
##   player       dunk_made dunk_attempts dunk_pct rim_made rim_attempts rim_pct
##   <chr>            <dbl>         <dbl> <chr>       <dbl>        <dbl> <chr>  
## 1 Xavier Henry        17            17 100.0%         60           90 66.7%  
## # ℹ 48 more variables: rim_asted <chr>, other2pt_made <dbl>,
## #   other2pt_attempts <dbl>, other2pt_pct <chr>, other2pt_asted <chr>,
## #   `3pt_tot` <chr>, `3pt_pct` <chr>, `3pt_asted` <chr>, games <dbl>,
## #   mp_per_g_college <dbl>, fg_per_g <dbl>, fga_per_g <dbl>,
## #   fg_pct_college <dbl>, fg2_per_g <dbl>, fg2a_per_g <dbl>, fg2_pct <dbl>,
## #   fg3_per_g <dbl>, fg3a_per_g <dbl>, fg3_pct_college <dbl>, ft_per_g <dbl>,
## #   fta_per_g <dbl>, ft_pct_college <dbl>, orb_per_g <dbl>, drb_per_g <dbl>, …
# thirteenth overall
df_pick_13 <- is_not_bust(13, df_top_players, df2)
df_pick_13_bust <- is_bust(13, df_bottom_players, df2)

df_pick_13
## # A tibble: 7 × 55
##   player          dunk_made dunk_attempts dunk_pct rim_made rim_attempts rim_pct
##   <chr>               <dbl>         <dbl> <chr>       <dbl>        <dbl> <chr>  
## 1 Ed Davis               26            27 96.3%          42           50 84.0%  
## 2 Kelly Olynyk           25            28 89.3%         152          212 71.7%  
## 3 Zach LaVine            21            25 84.0%          51           90 56.7%  
## 4 Devin Booker            8             9 88.9%          42           59 71.2%  
## 5 Donovan Mitche…         9            13 69.2%          64          116 55.2%  
## 6 Tyler Herro             4             5 80.0%          56           84 66.7%  
## 7 Jalen Duren            70            76 92.1%         111          152 73.0%  
## # ℹ 48 more variables: rim_asted <chr>, other2pt_made <dbl>,
## #   other2pt_attempts <dbl>, other2pt_pct <chr>, other2pt_asted <chr>,
## #   `3pt_tot` <chr>, `3pt_pct` <chr>, `3pt_asted` <chr>, games <dbl>,
## #   mp_per_g_college <dbl>, fg_per_g <dbl>, fga_per_g <dbl>,
## #   fg_pct_college <dbl>, fg2_per_g <dbl>, fg2a_per_g <dbl>, fg2_pct <dbl>,
## #   fg3_per_g <dbl>, fg3a_per_g <dbl>, fg3_pct_college <dbl>, ft_per_g <dbl>,
## #   fta_per_g <dbl>, ft_pct_college <dbl>, orb_per_g <dbl>, drb_per_g <dbl>, …
df_pick_13_bust
## # A tibble: 2 × 55
##   player          dunk_made dunk_attempts dunk_pct rim_made rim_attempts rim_pct
##   <chr>               <dbl>         <dbl> <chr>       <dbl>        <dbl> <chr>  
## 1 Kendall Marsha…         0             0 0%             35           53 66.0%  
## 2 Jerome Robinson        12            13 92.3%          98          157 62.4%  
## # ℹ 48 more variables: rim_asted <chr>, other2pt_made <dbl>,
## #   other2pt_attempts <dbl>, other2pt_pct <chr>, other2pt_asted <chr>,
## #   `3pt_tot` <chr>, `3pt_pct` <chr>, `3pt_asted` <chr>, games <dbl>,
## #   mp_per_g_college <dbl>, fg_per_g <dbl>, fga_per_g <dbl>,
## #   fg_pct_college <dbl>, fg2_per_g <dbl>, fg2a_per_g <dbl>, fg2_pct <dbl>,
## #   fg3_per_g <dbl>, fg3a_per_g <dbl>, fg3_pct_college <dbl>, ft_per_g <dbl>,
## #   fta_per_g <dbl>, ft_pct_college <dbl>, orb_per_g <dbl>, drb_per_g <dbl>, …
# fourteenth overall
df_pick_14 <- is_not_bust(14, df_top_players, df2)
df_pick_14_bust <- is_bust(14, df_bottom_players, df2)

df_pick_14
## # A tibble: 4 × 55
##   player        dunk_made dunk_attempts dunk_pct rim_made rim_attempts rim_pct
##   <chr>             <dbl>         <dbl> <chr>       <dbl>        <dbl> <chr>  
## 1 Marcus Morris        31            33 93.9%         114          147 77.6%  
## 2 T.J. Warren          37            37 100.0%        192          251 76.5%  
## 3 Cameron Payne         3             3 100.0%         53           87 60.9%  
## 4 Bam Adebayo          99           105 94.3%         138          185 74.6%  
## # ℹ 48 more variables: rim_asted <chr>, other2pt_made <dbl>,
## #   other2pt_attempts <dbl>, other2pt_pct <chr>, other2pt_asted <chr>,
## #   `3pt_tot` <chr>, `3pt_pct` <chr>, `3pt_asted` <chr>, games <dbl>,
## #   mp_per_g_college <dbl>, fg_per_g <dbl>, fga_per_g <dbl>,
## #   fg_pct_college <dbl>, fg2_per_g <dbl>, fg2a_per_g <dbl>, fg2_pct <dbl>,
## #   fg3_per_g <dbl>, fg3a_per_g <dbl>, fg3_pct_college <dbl>, ft_per_g <dbl>,
## #   fta_per_g <dbl>, ft_pct_college <dbl>, orb_per_g <dbl>, drb_per_g <dbl>, …
df_pick_14_bust
## # A tibble: 1 × 55
##   player         dunk_made dunk_attempts dunk_pct rim_made rim_attempts rim_pct
##   <chr>              <dbl>         <dbl> <chr>       <dbl>        <dbl> <chr>  
## 1 Romeo Langford        10            14 71.4%          91          138 65.9%  
## # ℹ 48 more variables: rim_asted <chr>, other2pt_made <dbl>,
## #   other2pt_attempts <dbl>, other2pt_pct <chr>, other2pt_asted <chr>,
## #   `3pt_tot` <chr>, `3pt_pct` <chr>, `3pt_asted` <chr>, games <dbl>,
## #   mp_per_g_college <dbl>, fg_per_g <dbl>, fga_per_g <dbl>,
## #   fg_pct_college <dbl>, fg2_per_g <dbl>, fg2a_per_g <dbl>, fg2_pct <dbl>,
## #   fg3_per_g <dbl>, fg3a_per_g <dbl>, fg3_pct_college <dbl>, ft_per_g <dbl>,
## #   fta_per_g <dbl>, ft_pct_college <dbl>, orb_per_g <dbl>, drb_per_g <dbl>, …
# combining dataframes

df_good <- bind_rows(df_pick_1, df_pick_2, df_pick_3, df_pick_4, df_pick_5, df_pick_6, df_pick_7, df_pick_8, df_pick_9,
              df_pick_10, df_pick_11, df_pick_12, df_pick_13, df_pick_14)

df_busts <- bind_rows(df_pick_1_bust, df_pick_2_bust, df_pick_3_bust, df_pick_4_bust, df_pick_5_bust, df_pick_6_bust, df_pick_7_bust, df_pick_8_bust, df_pick_9_bust, df_pick_10_bust, df_pick_11_bust, df_pick_12_bust, df_pick_13_bust, df_pick_14_bust)

print(df_good, n = 20)
## # A tibble: 64 × 55
##    player         dunk_made dunk_attempts dunk_pct rim_made rim_attempts rim_pct
##    <chr>              <dbl>         <dbl> <chr>       <dbl>        <dbl> <chr>  
##  1 John Wall             33            36 91.7%         116          182 63.7%  
##  2 Kyrie Irving           0             0 0%             26           39 66.7%  
##  3 Anthony Davis         96            98 98.0%         152          174 87.4%  
##  4 Karl-Anthony …        22            24 91.7%          87          121 71.9%  
##  5 Ben Simmons           56            61 91.8%         159          220 72.3%  
##  6 Zion Williams…        72            79 91.1%         247          313 78.9%  
##  7 Anthony Edwar…        27            27 100.0%         89          129 69.0%  
##  8 D'Angelo Russ…         4             4 100.0%         70          110 63.6%  
##  9 Brandon Ingram        17            17 100.0%         69          117 59.0%  
## 10 Lonzo Ball            37            40 92.5%          94          120 78.3%  
## 11 Ja Morant             28            31 90.3%         160          264 60.6%  
## 12 Chet Holmgren         57            57 100.0%        105          125 84.0%  
## 13 Bradley Beal          18            20 90.0%          89          137 65.0%  
## 14 Joel Embiid           30            30 100.0%         80           99 80.8%  
## 15 Jayson Tatum          18            21 85.7%          79          126 62.7%  
## 16 Evan Mobley           63            66 95.5%         113          144 78.5%  
## 17 Aaron Gordon          54            56 96.4%         137          198 69.2%  
## 18 Jaren Jackson…        31            31 100.0%         61           93 65.6%  
## 19 Scottie Barnes        19            21 90.5%          61           89 68.5%  
## 20 Keegan Murray         63            67 94.0%         196          277 70.8%  
## # ℹ 44 more rows
## # ℹ 48 more variables: rim_asted <chr>, other2pt_made <dbl>,
## #   other2pt_attempts <dbl>, other2pt_pct <chr>, other2pt_asted <chr>,
## #   `3pt_tot` <chr>, `3pt_pct` <chr>, `3pt_asted` <chr>, games <dbl>,
## #   mp_per_g_college <dbl>, fg_per_g <dbl>, fga_per_g <dbl>,
## #   fg_pct_college <dbl>, fg2_per_g <dbl>, fg2a_per_g <dbl>, fg2_pct <dbl>,
## #   fg3_per_g <dbl>, fg3a_per_g <dbl>, fg3_pct_college <dbl>, ft_per_g <dbl>, …
df_good |> pull(player)
##  [1] "John Wall"                "Kyrie Irving"            
##  [3] "Anthony Davis"            "Karl-Anthony Towns"      
##  [5] "Ben Simmons"              "Zion Williamson"         
##  [7] "Anthony Edwards"          "D'Angelo Russell"        
##  [9] "Brandon Ingram"           "Lonzo Ball"              
## [11] "Ja Morant"                "Chet Holmgren"           
## [13] "Bradley Beal"             "Joel Embiid"             
## [15] "Jayson Tatum"             "Evan Mobley"             
## [17] "Aaron Gordon"             "Jaren Jackson Jr."       
## [19] "Scottie Barnes"           "Keegan Murray"           
## [21] "DeMarcus Cousins"         "De'Aaron Fox"            
## [23] "Trae Young"               "Damian Lillard"          
## [25] "Nerlens Noel"             "Marcus Smart"            
## [27] "Buddy Hield"              "Onyeka Okongwu"          
## [29] "Julius Randle"            "Jamal Murray"            
## [31] "Lauri Markkanen"          "Al-Farouq Aminu"         
## [33] "Kentavious Caldwell-Pope" "Franz Wagner"            
## [35] "Gordon Hayward"           "Kemba Walker"            
## [37] "Andre Drummond"           "Trey Burke"              
## [39] "Jakob Poeltl"             "Paul George"             
## [41] "CJ McCollum"              "Elfrid Payton"           
## [43] "Mikal Bridges"            "Jalen Smith"             
## [45] "Klay Thompson"            "Myles Turner"            
## [47] "Domantas Sabonis"         "Shai Gilgeous-Alexander" 
## [49] "Steven Adams"             "Miles Bridges"           
## [51] "Tyrese Haliburton"        "Jalen Williams"          
## [53] "Dereck Lively II"         "Ed Davis"                
## [55] "Kelly Olynyk"             "Zach LaVine"             
## [57] "Devin Booker"             "Donovan Mitchell"        
## [59] "Tyler Herro"              "Jalen Duren"             
## [61] "Marcus Morris"            "T.J. Warren"             
## [63] "Cameron Payne"            "Bam Adebayo"
good_list <- df_good |> pull(player)


df_busts |> pull(player)
##  [1] "Anthony Bennett"  "Derrick Williams" "Jahlil Okafor"    "Thomas Robinson" 
##  [5] "Ekpe Udoh"        "Ben McLemore"     "Kevin Knox"       "Ziaire Williams" 
##  [9] "Johnny Davis"     "James Bouknight"  "Jett Howard"      "Xavier Henry"    
## [13] "Kendall Marshall" "Jerome Robinson"  "Romeo Langford"
bust_list <- df_busts |> pull(player)
plot_good <- ggplot(df_good, aes(x = fg2_per_g, y = fg3_per_g)) +
  geom_point(color = "green", size = 4, alpha = 0.5) +
  geom_label_repel(data = df_good, aes(label = player), size = 1.5, max.overlaps = 20) +
  labs(title = "CBB Shot Selection for Good Value NBA Lottery Picks", x = "2PT Field Goal Makes per game", y = "3PT Field Goal Makes per game") +
  theme_bw()

plot_busts <- ggplot(df_busts, aes(x = fg2_per_g, y = fg3_per_g, label = player)) +
  geom_point(color = "red", size = 4, alpha = 0.5) +
  geom_label_repel(size = 3) +
  labs(title = "CBB Shot Selection for NBA Lottery Busts", x = "2PT Field Goal Makes per game", y = "3PT Field Goal Makes per game") +
  theme_bw()


plot_combined <- ggplot(df2, aes(x = fg2_per_g, y = fg3_per_g)) +
  geom_point(aes(color = "Average value"), size = 4, alpha = 0.2) +
  geom_point(data = df_busts, aes(color = "Bad value"), size = 4, alpha = 0.5) +
  geom_point(data = df_good, aes(color = "Good value"), size = 4, alpha = 0.5) +
  labs(title = "CBB Shot Selection for NBA Lottery Picks", 
       x = "2PT Field Goal Makes per game", y = "3PT Field Goal Makes per game", color = "Value") +
  scale_color_manual(values = c("Bad value" = "red", "Good value" = "green", "Average value" = "grey")) +
  theme_bw()

plot_combined

plot_busts

plot_good

Principal Component Analysis of college stats, for grouping/covariance

library(corrr)
library(ggcorrplot)
library(FactoMineR)
library(factoextra)
## Welcome! Want to learn more? See two factoextra-related books at https://goo.gl/ve3WBa

Preparing Data

colnames(df2)
##  [1] "player"            "dunk_made"         "dunk_attempts"    
##  [4] "dunk_pct"          "rim_made"          "rim_attempts"     
##  [7] "rim_pct"           "rim_asted"         "other2pt_made"    
## [10] "other2pt_attempts" "other2pt_pct"      "other2pt_asted"   
## [13] "3pt_tot"           "3pt_pct"           "3pt_asted"        
## [16] "games"             "mp_per_g_college"  "fg_per_g"         
## [19] "fga_per_g"         "fg_pct_college"    "fg2_per_g"        
## [22] "fg2a_per_g"        "fg2_pct"           "fg3_per_g"        
## [25] "fg3a_per_g"        "fg3_pct_college"   "ft_per_g"         
## [28] "fta_per_g"         "ft_pct_college"    "orb_per_g"        
## [31] "drb_per_g"         "trb_per_g_college" "ast_per_g_college"
## [34] "stl_per_g"         "blk_per_g"         "tov_per_g"        
## [37] "pts_per_g_college" "pick_overall"      "college_name"     
## [40] "seasons"           "g"                 "fg_pct_nba"       
## [43] "fg3_pct_nba"       "ft_pct_nba"        "mp_per_g_nba"     
## [46] "pts_per_g_nba"     "trb_per_g_nba"     "ast_per_g_nba"    
## [49] "ws"                "ws_per_48"         "bpm"              
## [52] "vorp"              "year"              "pra_per_g"        
## [55] "vorp_per_g"
df3 <- column_to_rownames(df2, var = "player")

df_cbb <- df3 |> select(
  dunk_made, dunk_attempts, dunk_pct, rim_made, rim_attempts, rim_pct, rim_asted,
  other2pt_made, other2pt_attempts, other2pt_pct, other2pt_asted, fg2_pct, fg3_per_g, fg3a_per_g, 
 `3pt_asted`, games, ft_per_g, fta_per_g, ast_per_g_college, orb_per_g, drb_per_g,
  stl_per_g, blk_per_g, tov_per_g, pts_per_g_college
) |> rename(fg3_asted = `3pt_asted`) |> 
  mutate(dunk_pct = parse_number(dunk_pct) / 100, # changing to percentages
         rim_pct = parse_number(rim_pct) / 100,
         rim_asted = parse_number(rim_asted) / 100,
         other2pt_pct = parse_number(other2pt_pct) / 100,
         other2pt_asted = parse_number(other2pt_asted) / 100,
         fg3_asted = parse_number(fg3_asted) / 100,
         fg3_pct_per_g = fg3_per_g / fg3a_per_g)

df_cbb <- df_cbb |> 
  mutate(fg3_pct_per_g = coalesce(fg3_pct_per_g, 0)) |>
  relocate(fg3_pct_per_g, .after = fg3_asted)

to_per_game <- function(x, games) {
  x <- x / games
  return(x)
}

df_cbb <- df_cbb |> # making everything per game
  mutate(across(c(dunk_made, dunk_attempts, rim_made, rim_attempts, other2pt_made, 
                  other2pt_attempts), function(x) to_per_game(x, games)))

colSums(is.na(df_cbb))
##         dunk_made     dunk_attempts          dunk_pct          rim_made 
##                 0                 0                 0                 0 
##      rim_attempts           rim_pct         rim_asted     other2pt_made 
##                 0                 0                 0                 0 
## other2pt_attempts      other2pt_pct    other2pt_asted           fg2_pct 
##                 0                 0                 0                 0 
##         fg3_per_g        fg3a_per_g         fg3_asted     fg3_pct_per_g 
##                 0                 0                 0                 0 
##             games          ft_per_g         fta_per_g ast_per_g_college 
##                 0                 0                 0                 0 
##         orb_per_g         drb_per_g         stl_per_g         blk_per_g 
##                 0                 0                 0                 0 
##         tov_per_g pts_per_g_college 
##                 0                 0

Following this guide: https://www.datacamp.com/tutorial/pca-analysis-r

df_cbb_scaled <- as_tibble(scale(df_cbb |> select(-games)))
df_cbb_scaled
## # A tibble: 165 × 25
##    dunk_made dunk_attempts dunk_pct rim_made rim_attempts rim_pct rim_asted
##        <dbl>         <dbl>    <dbl>    <dbl>        <dbl>   <dbl>     <dbl>
##  1     0.338        0.359    0.125    0.886        1.15    -0.582   -0.936 
##  2    -0.789       -0.813    0.332   -0.925       -1.11     1.48    -1.47  
##  3     1.25         1.23     0.325    1.46         1.11     1.46     0.662 
##  4     0.599        0.655    0.0208   0.478        0.382    0.591    0.733 
##  5     1.13         1.15     0.215    1.44         1.18     1.16     0.633 
##  6     0.246        0.233    0.270    0.0605      -0.0262   0.521   -0.0688
##  7    -0.560       -0.589    0.387    0.233        0.367   -0.443    0.0432
##  8     0.102        0.0619   0.408   -0.246       -0.174   -0.443    0.0668
##  9    -0.635       -0.661    0.353   -0.687       -0.754    0.228    0.615 
## 10    -0.619       -0.573   -0.560   -0.839       -0.861   -0.261   -0.623 
## # ℹ 155 more rows
## # ℹ 18 more variables: other2pt_made <dbl>, other2pt_attempts <dbl>,
## #   other2pt_pct <dbl>, other2pt_asted <dbl>, fg2_pct <dbl>, fg3_per_g <dbl>,
## #   fg3a_per_g <dbl>, fg3_asted <dbl>, fg3_pct_per_g <dbl>, ft_per_g <dbl>,
## #   fta_per_g <dbl>, ast_per_g_college <dbl>, orb_per_g <dbl>, drb_per_g <dbl>,
## #   stl_per_g <dbl>, blk_per_g <dbl>, tov_per_g <dbl>, pts_per_g_college <dbl>
# df_cbb_scaled$player <- df_cbb$player
# df_cbb_scaled <- df_cbb_scaled |> relocate(player, .before = dunk_made)

corr_matrix <- cor(df_cbb_scaled)
colnames(corr_matrix)
##  [1] "dunk_made"         "dunk_attempts"     "dunk_pct"         
##  [4] "rim_made"          "rim_attempts"      "rim_pct"          
##  [7] "rim_asted"         "other2pt_made"     "other2pt_attempts"
## [10] "other2pt_pct"      "other2pt_asted"    "fg2_pct"          
## [13] "fg3_per_g"         "fg3a_per_g"        "fg3_asted"        
## [16] "fg3_pct_per_g"     "ft_per_g"          "fta_per_g"        
## [19] "ast_per_g_college" "orb_per_g"         "drb_per_g"        
## [22] "stl_per_g"         "blk_per_g"         "tov_per_g"        
## [25] "pts_per_g_college"
ggcorrplot(corr_matrix, method = "square")

ggcorrplot(corr_matrix, method = "square", hc.order = TRUE,
           type = "lower", tl.cex = 7, title = "Correlations between different college statistics")

Guides: http://www.sthda.com/english/articles/31-principal-component-methods-in-r-practical-guide/112-pca-principal-component-analysis-essentials/

Kmeans Clustering: https://medium.com/@zullinira23/implementation-of-principal-component-analysis-pca-on-k-means-clustering-in-r-794f03ec15f

df_cbb.sample <- df_cbb |> sample_frac(1, replace = FALSE)
head(df_cbb.sample)
##                   dunk_made dunk_attempts dunk_pct rim_made rim_attempts
## Anthony Bennett   1.5142857     1.6571429    0.914 2.857143     4.000000
## Marquese Chriss   1.5294118     1.9117647    0.800 3.000000     4.264706
## Patrick Patterson 0.6701031     0.7216495    0.929 1.402062     1.701031
## Jaren Jackson Jr. 0.8857143     0.8857143    1.000 1.742857     2.657143
## Cason Wallace     0.3437500     0.3437500    1.000 1.625000     2.281250
## Bradley Beal      0.4864865     0.5405405    0.900 2.405405     3.702703
##                   rim_pct rim_asted other2pt_made other2pt_attempts
## Anthony Bennett     0.714     0.620     1.6000000          3.685714
## Marquese Chriss     0.703     0.529     1.6470588          3.911765
## Patrick Patterson   0.824     0.647     0.5154639          1.309278
## Jaren Jackson Jr.   0.656     0.459     0.5714286          1.228571
## Cason Wallace       0.712     0.096     1.3437500          3.500000
## Bradley Beal        0.650     0.348     0.6216216          1.891892
##                   other2pt_pct other2pt_asted fg2_pct fg3_per_g fg3a_per_g
## Anthony Bennett          0.434          0.768   0.587       1.0        2.7
## Marquese Chriss          0.421          0.589   0.568       0.6        1.8
## Patrick Patterson        0.394          0.600   0.604       0.2        0.8
## Jaren Jackson Jr.        0.465          0.600   0.596       1.1        2.7
## Cason Wallace            0.384          0.093   0.514       1.4        4.0
## Bradley Beal             0.329          0.304   0.541       1.7        5.0
##                   fg3_asted fg3_pct_per_g games ft_per_g fta_per_g
## Anthony Bennett       0.972     0.3703704    35      3.5       5.1
## Marquese Chriss       0.952     0.3333333    34      2.6       3.8
## Patrick Patterson     0.957     0.2500000    97      3.2       4.3
## Jaren Jackson Jr.     0.974     0.4074074    35      3.0       3.8
## Cason Wallace         0.818     0.3500000    32      1.7       2.2
## Bradley Beal          0.905     0.3400000    37      3.6       4.7
##                   ast_per_g_college orb_per_g drb_per_g stl_per_g blk_per_g
## Anthony Bennett                 1.0       2.5       5.7       0.7       1.2
## Marquese Chriss                 0.8       2.5       2.9       0.9       1.6
## Patrick Patterson               1.5       2.9       5.3       0.7       1.6
## Jaren Jackson Jr.               1.1       1.5       4.3       0.6       3.0
## Cason Wallace                   4.3       0.9       2.8       2.0       0.5
## Bradley Beal                    2.2       1.4       5.4       1.4       0.8
##                   tov_per_g pts_per_g_college
## Anthony Bennett         1.9              16.1
## Marquese Chriss         2.0              13.7
## Patrick Patterson       1.6              16.1
## Jaren Jackson Jr.       1.8              10.9
## Cason Wallace           2.1              11.7
## Bradley Beal            2.1              14.8
df_cbb.pca <- PCA(df_cbb.sample |> select(-games), scale.unit = TRUE, graph = FALSE)

fviz_eig(df_cbb.pca, addlabels = TRUE, main = "Statistics Represented in Lower Dimensional Components")

fviz_pca_var(df_cbb.pca, col.var = "cos2", gradient.cols = c("lightblue", "black"), repel = TRUE)

var <- get_pca_var(df_cbb.pca)

fviz_cos2(df_cbb.pca, choice = "var", axes = 1:2) + 
  labs(title = "Quality of Representation to PCA Dimensions 1 and 2")

fviz_pca_ind(df_cbb.pca, repel = TRUE, labelsize = 1) +
  labs(title = "NBA Lottery Picks on PCA Dimensions 1 and 2")

summary(df_cbb.pca)
## 
## Call:
## PCA(X = select(df_cbb.sample, -games), scale.unit = TRUE, graph = FALSE) 
## 
## 
## Eigenvalues
##                        Dim.1   Dim.2   Dim.3   Dim.4   Dim.5   Dim.6   Dim.7
## Variance               7.916   5.341   2.094   1.495   1.261   1.201   1.002
## % of var.             31.663  21.364   8.376   5.978   5.045   4.803   4.008
## Cumulative % of var.  31.663  53.027  61.403  67.381  72.426  77.229  81.237
##                        Dim.8   Dim.9  Dim.10  Dim.11  Dim.12  Dim.13  Dim.14
## Variance               0.967   0.751   0.556   0.446   0.420   0.327   0.267
## % of var.              3.867   3.006   2.225   1.786   1.679   1.310   1.067
## Cumulative % of var.  85.104  88.110  90.335  92.121  93.800  95.110  96.177
##                       Dim.15  Dim.16  Dim.17  Dim.18  Dim.19  Dim.20  Dim.21
## Variance               0.258   0.213   0.164   0.141   0.072   0.062   0.024
## % of var.              1.032   0.850   0.655   0.563   0.286   0.249   0.097
## Cumulative % of var.  97.209  98.059  98.714  99.277  99.564  99.813  99.910
##                       Dim.22  Dim.23  Dim.24  Dim.25
## Variance               0.011   0.006   0.004   0.002
## % of var.              0.046   0.023   0.014   0.007
## Cumulative % of var.  99.955  99.979  99.993 100.000
## 
## Individuals (the 10 first)
##                       Dist    Dim.1    ctr   cos2    Dim.2    ctr   cos2  
## Anthony Bennett   |  4.389 |  3.010  0.694  0.470 |  0.997  0.113  0.052 |
## Marquese Chriss   |  4.114 |  2.704  0.560  0.432 |  0.402  0.018  0.010 |
## Patrick Patterson |  4.401 |  2.766  0.586  0.395 | -1.640  0.305  0.139 |
## Jaren Jackson Jr. |  4.049 |  1.503  0.173  0.138 | -1.816  0.374  0.201 |
## Cason Wallace     |  4.400 | -2.425  0.450  0.304 | -0.572  0.037  0.017 |
## Bradley Beal      |  2.297 | -0.965  0.071  0.176 |  0.450  0.023  0.038 |
## Markelle Fultz    |  7.797 | -2.386  0.436  0.094 |  6.004  4.091  0.593 |
## Evan Turner       |  4.173 | -1.376  0.145  0.109 |  0.802  0.073  0.037 |
## Steven Adams      |  6.140 |  3.716  1.057  0.366 | -2.840  0.915  0.214 |
## John Henson       |  6.097 |  3.263  0.815  0.286 | -2.676  0.812  0.193 |
##                    Dim.3    ctr   cos2  
## Anthony Bennett    2.370  1.626  0.292 |
## Marquese Chriss    1.318  0.503  0.103 |
## Patrick Patterson  0.330  0.031  0.006 |
## Jaren Jackson Jr.  0.874  0.221  0.047 |
## Cason Wallace     -0.474  0.065  0.012 |
## Bradley Beal      -0.187  0.010  0.007 |
## Markelle Fultz     2.275  1.498  0.085 |
## Evan Turner       -1.019  0.301  0.060 |
## Steven Adams      -2.476  1.774  0.163 |
## John Henson       -0.607  0.107  0.010 |
## 
## Variables (the 10 first)
##                      Dim.1    ctr   cos2    Dim.2    ctr   cos2    Dim.3    ctr
## dunk_made         |  0.835  8.802  0.697 |  0.336  2.117  0.113 | -0.025  0.029
## dunk_attempts     |  0.829  8.675  0.687 |  0.338  2.140  0.114 | -0.025  0.030
## dunk_pct          |  0.334  1.412  0.112 | -0.120  0.269  0.014 |  0.228  2.485
## rim_made          |  0.584  4.305  0.341 |  0.672  8.468  0.452 | -0.088  0.370
## rim_attempts      |  0.451  2.571  0.203 |  0.740 10.243  0.547 | -0.093  0.410
## rim_pct           |  0.724  6.625  0.524 | -0.148  0.409  0.022 | -0.001  0.000
## rim_asted         |  0.767  7.441  0.589 | -0.354  2.351  0.126 |  0.182  1.578
## other2pt_made     |  0.106  0.141  0.011 |  0.601  6.772  0.362 |  0.560 14.964
## other2pt_attempts |  0.102  0.131  0.010 |  0.651  7.924  0.423 |  0.484 11.203
## other2pt_pct      |  0.007  0.001  0.000 | -0.045  0.039  0.002 |  0.359  6.140
##                     cos2  
## dunk_made          0.001 |
## dunk_attempts      0.001 |
## dunk_pct           0.052 |
## rim_made           0.008 |
## rim_attempts       0.009 |
## rim_pct            0.000 |
## rim_asted          0.033 |
## other2pt_made      0.313 |
## other2pt_attempts  0.235 |
## other2pt_pct       0.129 |
pca2 <- prcomp(df_cbb.sample, center = TRUE, scale = TRUE)
summary(pca2)
## Importance of components:
##                           PC1    PC2     PC3     PC4     PC5     PC6     PC7
## Standard deviation     2.8233 2.3577 1.45399 1.29215 1.18882 1.10678 1.00292
## Proportion of Variance 0.3066 0.2138 0.08131 0.06422 0.05436 0.04711 0.03869
## Cumulative Proportion  0.3066 0.5204 0.60167 0.66589 0.72025 0.76736 0.80605
##                            PC8     PC9    PC10    PC11    PC12    PC13    PC14
## Standard deviation     0.98328 0.87000 0.75739 0.66985 0.65034 0.64385 0.56902
## Proportion of Variance 0.03719 0.02911 0.02206 0.01726 0.01627 0.01594 0.01245
## Cumulative Proportion  0.84323 0.87234 0.89441 0.91166 0.92793 0.94388 0.95633
##                           PC15    PC16    PC17    PC18    PC19    PC20   PC21
## Standard deviation     0.51470 0.49095 0.41133 0.40183 0.37408 0.26745 0.2038
## Proportion of Variance 0.01019 0.00927 0.00651 0.00621 0.00538 0.00275 0.0016
## Cumulative Proportion  0.96652 0.97579 0.98230 0.98851 0.99389 0.99664 0.9982
##                          PC22    PC23    PC24    PC25    PC26
## Standard deviation     0.1527 0.10681 0.07652 0.05949 0.04172
## Proportion of Variance 0.0009 0.00044 0.00023 0.00014 0.00007
## Cumulative Proportion  0.9991 0.99957 0.99980 0.99993 1.00000
df_cluster <- as.data.frame(-pca2$x[, 1:2])
df_cluster
##                                  PC1           PC2
## Anthony Bennett          -3.08683149  0.9658417837
## Marquese Chriss          -2.77746275  0.4521718014
## Patrick Patterson        -2.53963602 -2.1109010563
## Jaren Jackson Jr.        -1.49089242 -1.7016335898
## Cason Wallace             2.34104930 -0.2152815400
## Bradley Beal              0.88415187  0.6225122311
## Markelle Fultz            2.05054203  6.2356705555
## Evan Turner               1.49673522  0.4578015058
## Steven Adams             -3.66839570 -2.7680962536
## John Henson              -2.97155356 -3.1904829161
## Malik Monk                2.28778612  1.6797126865
## Johnny Davis              1.33669032 -0.1478916084
## Stanley Johnson           1.16931864  1.1296582673
## Ben Simmons              -3.55778734  6.3959006442
## Ochai Agbaji              1.65972813 -3.1665060860
## Alec Burks                0.74320461  2.0264013166
## Ja Morant                 2.49001353  3.1186017067
## Marcus Morris            -0.73895771 -2.6323130400
## Chris Duarte              1.92568686 -1.3515954340
## Trey Lyles               -2.26213434 -2.3189123450
## Wes Johnson              -1.50316126  1.3047269862
## Willie Cauley-Stein      -2.65175786 -4.0213075524
## Paolo Banchero           -0.71365922  2.7656057765
## Marcus Smart              2.59113529  1.9709358714
## Alex Len                 -3.60602390 -2.2253867363
## Jett Howard               2.91653599 -1.5725624278
## Greg Monroe              -0.55772278  0.6192425231
## Derrick Williams         -1.03830203  1.0304451116
## Jordan Hawkins            3.13721836 -2.6918367917
## Andrew Wiggins           -0.36542307  2.6025198229
## Anthony Edwards           1.40251473  2.6178813170
## CJ McCollum               3.90112719  0.5500261983
## Cole Aldrich             -2.78307370 -3.9300845772
## Brandon Knight            3.54151495  2.0093567331
## Kemba Walker              3.18777727  0.5896508288
## De'Andre Hunter           0.77514281 -2.0904369415
## Al-Farouq Aminu          -1.02295866  0.6557701582
## Cody Zeller              -2.02496946  0.5680230150
## Doug McDermott            1.01124840 -1.4023518631
## Bam Adebayo              -5.99686501  1.1625539440
## Dereck Lively II         -5.06532407 -4.9338352381
## Obi Toppin               -3.01791562 -0.9098400603
## Jamal Murray              2.37633667  1.7128111826
## Luke Kennard              2.76829877 -0.9772815337
## Keegan Murray            -1.59473589 -0.7127863270
## Justise Winslow           0.36808543  0.0140660045
## James Bouknight           1.52674532 -0.8368198759
## Jeremy Lamb               0.50899046 -2.6527408299
## Jalen Smith              -2.10897015 -1.8201712718
## Trey Burke                3.60982350  0.5620071724
## Gordon Hayward            0.68966882 -0.8690154013
## Zach Collins             -2.23468578 -2.2257336106
## Onyeka Okongwu           -5.79890613  2.6047417619
## Rui Hachimura            -0.89990828 -2.6695528608
## Jimmer Fredette           4.12885438 -0.1532925420
## Joel Embiid              -4.49751364 -0.3052975967
## Jalen Suggs               1.57680464  1.3826079667
## Trae Young                6.66738911  7.9875515113
## Denzel Valentine          3.08887719 -3.2154062834
## Shabazz Muhammad         -0.36507564  2.1338042145
## Devin Booker              1.28579904 -3.5063787159
## Michael Carter-Williams   2.91272861 -0.8684079705
## D'Angelo Russell          2.98270993  2.6314533501
## Thomas Robinson          -0.41276682 -2.4752367530
## Markieff Morris          -1.29699896 -4.0373945397
## Cameron Payne             4.48285978  1.3236954751
## Damian Lillard            4.29694802  0.3843340708
## Brandon Miller            2.00076132  1.9889065138
## Gradey Dick               1.44740283 -1.2927758040
## Terrence Ross             1.18817176 -2.5860223513
## Joshua Primo              1.55551858 -3.7707023835
## Buddy Hield               3.54777620 -1.4535258538
## Nerlens Noel             -5.45383914  0.7991793564
## Scottie Barnes            0.41005879 -0.0002825293
## Devin Vassell             0.72773722 -4.1046446591
## Davion Mitchell           3.51406172 -1.6468468321
## Dion Waiters              1.92316128 -2.9143303356
## Jakob Poeltl             -3.07463033 -1.0055357287
## Tyrese Haliburton         2.28374305 -2.7467761148
## Collin Sexton             2.52183153  3.9991906129
## Taurean Prince            1.44792106 -3.7467662602
## Jalen Williams            1.80913049 -1.6958213307
## Jaxson Hayes             -6.10602196 -1.9333235932
## Paul George               2.10747433 -0.1876406410
## Frank Kaminsky            0.20730366 -4.1181677131
## Jaylen Brown              0.86593793  2.2952001302
## Lauri Markkanen           0.18748839 -0.1368727294
## Shai Gilgeous-Alexander   2.04551424  2.3234644917
## Victor Oladipo            0.63903222 -2.5888631078
## Kelly Olynyk             -0.73492325 -3.8811181762
## Domantas Sabonis         -1.67699707 -1.2190984214
## Kevin Knox                0.63604976  0.7501807107
## Mo Bamba                 -5.12168936  0.6472841174
## Julius Randle            -2.20418418  3.0357101236
## Otto Porter Jr.          -0.04885497 -1.8356037020
## Nik Stauskas              3.13687982 -1.8802991525
## Patrick Williams          0.12807727 -1.3341060184
## Kendall Marshall          4.60320232 -1.8524037126
## Aaron Gordon             -2.71643469  0.5168844660
## Kira Lewis Jr.            3.26278428  0.0954317440
## Jabari Smith Jr.          1.62886867  1.4477810399
## Isaac Okoro              -0.42993503  0.2975331821
## Jaden Ivey                1.86477689 -0.0773337674
## Donovan Mitchell          3.15753551 -1.8102604526
## Franz Wagner              1.24385093 -2.2215940450
## Josh Jackson             -1.48957168  3.0475464620
## Derrick Favors           -5.01881131  0.5372658952
## Jabari Parker            -2.40112005  3.8466488862
## Taylor Hendricks         -1.15461505 -0.2239571117
## Meyers Leonard           -2.74024056 -3.6508169349
## Marvin Bagley III        -5.65427831  4.5448753922
## Cade Cunningham           2.90919016  4.4345298643
## T.J. Warren              -1.08912829  0.3669642309
## Evan Mobley              -4.26687599  2.6519044433
## De'Aaron Fox              0.87066774  3.3180112387
## Lonzo Ball                0.30972840 -0.0007053551
## Elfrid Payton             2.00506164  0.8904543832
## John Wall                 1.73494610  3.9455798842
## Klay Thompson             3.84131304 -0.2800845197
## Myles Turner             -1.86401362 -1.8030642874
## Jerome Robinson           3.05834301 -0.2586574385
## Anthony Davis            -6.73630616  1.2629548886
## Jalen Duren              -5.98165537  0.5776256581
## Tristan Thompson         -4.60556911  2.1556621458
## Noah Vonleh              -1.11130925  0.1751834144
## Jahlil Okafor            -5.47754196  2.7772955807
## Bennedict Mathurin        1.07663046 -1.0805356809
## Mikal Bridges             1.20162590 -3.5903451628
## Xavier Henry              1.24518982 -0.8915202286
## Ekpe Udoh                -2.87709283  2.0867754797
## Chet Holmgren            -4.15750678 -0.0605690899
## Coby White                3.35981962  0.9338304491
## DeMarcus Cousins         -4.42466023  2.5278733794
## Cam Reddish               4.14985479 -0.0842589880
## Michael Kidd-Gilchrist   -1.49000302  0.5168492854
## Dennis Smith Jr.          2.43263222  4.0080211081
## Kentavious Caldwell-Pope  2.25315004 -0.8810443210
## Zach LaVine               1.60422247 -2.6481909219
## Karl-Anthony Towns       -2.08103954 -1.1196065654
## Jeremy Sochan            -0.86229364 -1.9652843807
## Ziaire Williams           1.78971475 -0.2486334187
## Wendell Carter Jr.       -3.36498747  0.7216499359
## Brandon Ingram            1.66287033  1.8607679236
## Andre Drummond           -5.95363392 -0.5511351054
## Austin Rivers             2.95329868  1.3791937733
## RJ Barrett                0.08315247  5.0528618008
## Aaron Nesmith             3.28346405 -1.6855751483
## Jonathan Isaac           -1.48927680 -0.4512240082
## Ben McLemore             -0.24239908  0.1870327074
## Deandre Ayton            -6.32878662  3.9424880892
## Romeo Langford            0.73466257  2.2444498661
## Jayson Tatum              0.78010288  2.6444271533
## Kyrie Irving              3.77122159  3.0033308087
## Moses Moody               1.46960252  1.4190061757
## Miles Bridges             0.51099544 -0.8957909493
## Kris Dunn                 2.69748838 -0.0951825819
## P.J. Washington          -0.33949853 -1.0835362435
## Tyler Herro               1.81846173 -0.4356215772
## Jarrett Culver            1.51221150 -0.0681535683
## Zion Williamson          -5.07447989  4.6504795504
## Anthony Black             1.20227004  1.7148572319
## Cameron Johnson           1.47106716 -2.1704255345
## Ed Davis                 -3.31971345 -3.1931177330
## Jarace Walker            -1.30148259 -1.2591517315
## Harrison Barnes           1.11071750 -0.4675508975
fviz_nbclust(df_cluster, kmeans, method = 'wss')

fviz_nbclust(df_cluster, kmeans, method = 'silhouette')

fviz_nbclust(df_cluster, kmeans, method = 'gap_stat')

k <- 15
df_cbb.kmeans <- kmeans(df_cluster, centers = k, nstart = 50)
df_cbb.kmeans2 <- kmeans(df_cluster, centers = 10, nstart = 50)
df_cbb.kmeans3 <- kmeans(df_cluster, centers = 5, nstart = 50)

df_cbb.kmeans
## K-means clustering with 15 clusters of sizes 9, 1, 8, 9, 8, 16, 7, 18, 25, 6, 18, 1, 14, 13, 12
## 
## Cluster means:
##           PC1        PC2
## 1  -5.6518151  0.2441231
## 2  -3.5577873  6.3959006
## 3  -5.2038873  3.2319150
## 4  -3.1225646 -3.5500581
## 5   2.6116359  3.9220472
## 6  -1.7554870 -1.6598201
## 7  -1.0651259  3.2120996
## 8   0.9023738 -3.0037819
## 9   1.1995276 -0.4390785
## 10 -3.1633860  0.7804591
## 11  1.5229678  1.9259630
## 12  6.6673891  7.9875515
## 13  3.6051291  0.4968607
## 14  3.0891886 -1.8536157
## 15 -1.0961484  0.4205491
## 
## Clustering vector:
##          Anthony Bennett          Marquese Chriss        Patrick Patterson 
##                       10                       10                        6 
##        Jaren Jackson Jr.            Cason Wallace             Bradley Beal 
##                        6                        9                        9 
##           Markelle Fultz              Evan Turner             Steven Adams 
##                        5                        9                        4 
##              John Henson               Malik Monk             Johnny Davis 
##                        4                       11                        9 
##          Stanley Johnson              Ben Simmons             Ochai Agbaji 
##                       11                        2                        8 
##               Alec Burks                Ja Morant            Marcus Morris 
##                       11                        5                        6 
##             Chris Duarte               Trey Lyles              Wes Johnson 
##                        9                        6                       15 
##      Willie Cauley-Stein           Paolo Banchero             Marcus Smart 
##                        4                        7                       11 
##                 Alex Len              Jett Howard              Greg Monroe 
##                        4                       14                       15 
##         Derrick Williams           Jordan Hawkins           Andrew Wiggins 
##                       15                       14                        7 
##          Anthony Edwards              CJ McCollum             Cole Aldrich 
##                       11                       13                        4 
##           Brandon Knight             Kemba Walker          De'Andre Hunter 
##                       13                       13                        8 
##          Al-Farouq Aminu              Cody Zeller           Doug McDermott 
##                       15                       15                        9 
##              Bam Adebayo         Dereck Lively II               Obi Toppin 
##                        1                        4                        6 
##             Jamal Murray             Luke Kennard            Keegan Murray 
##                       11                       14                        6 
##          Justise Winslow          James Bouknight              Jeremy Lamb 
##                        9                        9                        8 
##              Jalen Smith               Trey Burke           Gordon Hayward 
##                        6                       13                        9 
##             Zach Collins           Onyeka Okongwu            Rui Hachimura 
##                        6                        3                        6 
##          Jimmer Fredette              Joel Embiid              Jalen Suggs 
##                       13                        1                       11 
##               Trae Young         Denzel Valentine         Shabazz Muhammad 
##                       12                       14                        7 
##             Devin Booker  Michael Carter-Williams         D'Angelo Russell 
##                        8                       14                        5 
##          Thomas Robinson          Markieff Morris            Cameron Payne 
##                        8                        4                       13 
##           Damian Lillard           Brandon Miller              Gradey Dick 
##                       13                       11                        9 
##            Terrence Ross             Joshua Primo              Buddy Hield 
##                        8                        8                       14 
##             Nerlens Noel           Scottie Barnes            Devin Vassell 
##                        1                        9                        8 
##          Davion Mitchell             Dion Waiters             Jakob Poeltl 
##                       14                        8                        6 
##        Tyrese Haliburton            Collin Sexton           Taurean Prince 
##                       14                        5                        8 
##           Jalen Williams             Jaxson Hayes              Paul George 
##                       14                        1                        9 
##           Frank Kaminsky             Jaylen Brown          Lauri Markkanen 
##                        8                       11                        9 
##  Shai Gilgeous-Alexander           Victor Oladipo             Kelly Olynyk 
##                       11                        8                        8 
##         Domantas Sabonis               Kevin Knox                 Mo Bamba 
##                        6                        9                        1 
##            Julius Randle          Otto Porter Jr.             Nik Stauskas 
##                        7                        8                       14 
##         Patrick Williams         Kendall Marshall             Aaron Gordon 
##                        9                       14                       10 
##           Kira Lewis Jr.         Jabari Smith Jr.              Isaac Okoro 
##                       13                       11                       15 
##               Jaden Ivey         Donovan Mitchell             Franz Wagner 
##                        9                       14                        8 
##             Josh Jackson           Derrick Favors            Jabari Parker 
##                        7                        1                        7 
##         Taylor Hendricks           Meyers Leonard        Marvin Bagley III 
##                       15                        4                        3 
##          Cade Cunningham              T.J. Warren              Evan Mobley 
##                        5                       15                        3 
##             De'Aaron Fox               Lonzo Ball            Elfrid Payton 
##                       11                        9                       11 
##                John Wall            Klay Thompson             Myles Turner 
##                        5                       13                        6 
##          Jerome Robinson            Anthony Davis              Jalen Duren 
##                       13                        1                        1 
##         Tristan Thompson              Noah Vonleh            Jahlil Okafor 
##                        3                       15                        3 
##       Bennedict Mathurin            Mikal Bridges             Xavier Henry 
##                        9                        8                        9 
##                Ekpe Udoh            Chet Holmgren               Coby White 
##                       10                       10                       13 
##         DeMarcus Cousins              Cam Reddish   Michael Kidd-Gilchrist 
##                        3                       13                       15 
##         Dennis Smith Jr. Kentavious Caldwell-Pope              Zach LaVine 
##                        5                        9                        8 
##       Karl-Anthony Towns            Jeremy Sochan          Ziaire Williams 
##                        6                        6                        9 
##       Wendell Carter Jr.           Brandon Ingram           Andre Drummond 
##                       10                       11                        1 
##            Austin Rivers               RJ Barrett            Aaron Nesmith 
##                       13                        7                       14 
##           Jonathan Isaac             Ben McLemore            Deandre Ayton 
##                       15                       15                        3 
##           Romeo Langford             Jayson Tatum             Kyrie Irving 
##                       11                       11                        5 
##              Moses Moody            Miles Bridges                Kris Dunn 
##                       11                        9                       13 
##          P.J. Washington              Tyler Herro           Jarrett Culver 
##                        6                        9                        9 
##          Zion Williamson            Anthony Black          Cameron Johnson 
##                        3                       11                        8 
##                 Ed Davis            Jarace Walker          Harrison Barnes 
##                        4                        6                        9 
## 
## Within cluster sum of squares by cluster:
##  [1] 11.979596  0.000000 10.588637 13.335329 11.493769 15.484948 11.267129
##  [8] 19.035413 19.986620  4.094306 12.092124  0.000000 10.282343 10.612227
## [15]  5.400759
##  (between_SS / total_SS =  93.0 %)
## 
## Available components:
## 
## [1] "cluster"      "centers"      "totss"        "withinss"     "tot.withinss"
## [6] "betweenss"    "size"         "iter"         "ifault"
fviz_cluster(df_cbb.kmeans, df_cluster, labelsize = 4, pointsize = 1, show.clust.cent = FALSE, repel = TRUE,
             xlab = "Dimension 1", ylab = "Dimension 2", main = "Clustering with K-means") + scale_y_reverse()

cluster_assignments <- df_cbb.kmeans$cluster
cluster_df15 <- data.frame(value = cluster_assignments, name = names(cluster_assignments))

cluster_assignments2 <- df_cbb.kmeans2$cluster
cluster_df10 <- data.frame(value = cluster_assignments2, name = names(cluster_assignments2))

cluster_assignments3 <- df_cbb.kmeans3$cluster
cluster_df5 <- data.frame(value = cluster_assignments3, name = names(cluster_assignments3))

cluster_df15 <- as.tibble(cluster_df15)
## Warning: `as.tibble()` was deprecated in tibble 2.0.0.
## ℹ Please use `as_tibble()` instead.
## ℹ The signature and semantics have changed, see `?as_tibble`.
## This warning is displayed once every 8 hours.
## Call `lifecycle::last_lifecycle_warnings()` to see where this warning was
## generated.
cluster_df10 <- as.tibble(cluster_df10)
cluster_df5 <- as.tibble(cluster_df5)

cluster_df15 <- cluster_df15 |> rename(pc_cluster = value) |> 
  relocate(pc_cluster, .after = name)
cluster_df10 <- cluster_df10 |> rename(pc_cluster = value) |> 
  relocate(pc_cluster, .after = name)
cluster_df5 <- cluster_df5 |> rename(pc_cluster = value) |> 
  relocate(pc_cluster, .after = name)

cluster_df15
## # A tibble: 165 × 2
##    name              pc_cluster
##    <chr>                  <int>
##  1 Anthony Bennett           10
##  2 Marquese Chriss           10
##  3 Patrick Patterson          6
##  4 Jaren Jackson Jr.          6
##  5 Cason Wallace              9
##  6 Bradley Beal               9
##  7 Markelle Fultz             5
##  8 Evan Turner                9
##  9 Steven Adams               4
## 10 John Henson                4
## # ℹ 155 more rows
df_cbb.sample$group <- df_cbb.kmeans$cluster
df_cbb.sample <- df_cbb.sample |> relocate(group, .before = dunk_made)
head(df_cbb.sample)
##                   group dunk_made dunk_attempts dunk_pct rim_made rim_attempts
## Anthony Bennett      10 1.5142857     1.6571429    0.914 2.857143     4.000000
## Marquese Chriss      10 1.5294118     1.9117647    0.800 3.000000     4.264706
## Patrick Patterson     6 0.6701031     0.7216495    0.929 1.402062     1.701031
## Jaren Jackson Jr.     6 0.8857143     0.8857143    1.000 1.742857     2.657143
## Cason Wallace         9 0.3437500     0.3437500    1.000 1.625000     2.281250
## Bradley Beal          9 0.4864865     0.5405405    0.900 2.405405     3.702703
##                   rim_pct rim_asted other2pt_made other2pt_attempts
## Anthony Bennett     0.714     0.620     1.6000000          3.685714
## Marquese Chriss     0.703     0.529     1.6470588          3.911765
## Patrick Patterson   0.824     0.647     0.5154639          1.309278
## Jaren Jackson Jr.   0.656     0.459     0.5714286          1.228571
## Cason Wallace       0.712     0.096     1.3437500          3.500000
## Bradley Beal        0.650     0.348     0.6216216          1.891892
##                   other2pt_pct other2pt_asted fg2_pct fg3_per_g fg3a_per_g
## Anthony Bennett          0.434          0.768   0.587       1.0        2.7
## Marquese Chriss          0.421          0.589   0.568       0.6        1.8
## Patrick Patterson        0.394          0.600   0.604       0.2        0.8
## Jaren Jackson Jr.        0.465          0.600   0.596       1.1        2.7
## Cason Wallace            0.384          0.093   0.514       1.4        4.0
## Bradley Beal             0.329          0.304   0.541       1.7        5.0
##                   fg3_asted fg3_pct_per_g games ft_per_g fta_per_g
## Anthony Bennett       0.972     0.3703704    35      3.5       5.1
## Marquese Chriss       0.952     0.3333333    34      2.6       3.8
## Patrick Patterson     0.957     0.2500000    97      3.2       4.3
## Jaren Jackson Jr.     0.974     0.4074074    35      3.0       3.8
## Cason Wallace         0.818     0.3500000    32      1.7       2.2
## Bradley Beal          0.905     0.3400000    37      3.6       4.7
##                   ast_per_g_college orb_per_g drb_per_g stl_per_g blk_per_g
## Anthony Bennett                 1.0       2.5       5.7       0.7       1.2
## Marquese Chriss                 0.8       2.5       2.9       0.9       1.6
## Patrick Patterson               1.5       2.9       5.3       0.7       1.6
## Jaren Jackson Jr.               1.1       1.5       4.3       0.6       3.0
## Cason Wallace                   4.3       0.9       2.8       2.0       0.5
## Bradley Beal                    2.2       1.4       5.4       1.4       0.8
##                   tov_per_g pts_per_g_college
## Anthony Bennett         1.9              16.1
## Marquese Chriss         2.0              13.7
## Patrick Patterson       1.6              16.1
## Jaren Jackson Jr.       1.8              10.9
## Cason Wallace           2.1              11.7
## Bradley Beal            2.1              14.8
df_cbb.sample |> group_by(group) |>
  summarize(across(everything(), mean)) |> print(n = 15, width = Inf)
## # A tibble: 15 × 27
##    group dunk_made dunk_attempts dunk_pct rim_made rim_attempts rim_pct
##    <int>     <dbl>         <dbl>    <dbl>    <dbl>        <dbl>   <dbl>
##  1     1     2.09          2.20     0.955    3.54          4.56   0.781
##  2     2     1.70          1.85     0.918    4.82          6.67   0.723
##  3     3     2.06          2.18     0.946    4.94          6.46   0.764
##  4     4     0.745         0.791    0.943    1.45          1.98   0.747
##  5     5     0.387         0.428    0.770    2.70          4.35   0.624
##  6     6     0.660         0.716    0.915    1.94          2.65   0.735
##  7     7     1.17          1.28     0.915    3.34          5.06   0.662
##  8     8     0.289         0.323    0.916    1.10          1.63   0.683
##  9     9     0.435         0.480    0.916    1.63          2.44   0.671
## 10    10     1.43          1.55     0.930    3.03          4.19   0.725
## 11    11     0.468         0.514    0.912    2.30          3.65   0.633
## 12    12     0             0        0        3.28          6.28   0.522
## 13    13     0.108         0.125    0.899    1.18          1.98   0.602
## 14    14     0.137         0.160    0.802    0.865         1.40   0.623
## 15    15     0.823         0.890    0.929    2.45          3.62   0.678
##    rim_asted other2pt_made other2pt_attempts other2pt_pct other2pt_asted fg2_pct
##        <dbl>         <dbl>             <dbl>        <dbl>          <dbl>   <dbl>
##  1     0.647         0.922              2.70        0.344          0.498   0.618
##  2     0.484         1.67               4.88        0.342          0.309   0.561
##  3     0.546         1.62               3.88        0.424          0.376   0.63 
##  4     0.698         0.608              1.57        0.357          0.626   0.580
##  5     0.221         1.52               4.03        0.37           0.083   0.509
##  6     0.583         0.884              2.10        0.423          0.474   0.595
##  7     0.440         1.90               5.17        0.365          0.291   0.511
##  8     0.492         0.647              1.58        0.401          0.346   0.548
##  9     0.404         0.909              2.41        0.375          0.262   0.525
## 10     0.543         1.47               3.73        0.401          0.463   0.583
## 11     0.309         1.31               3.56        0.362          0.161   0.501
## 12     0.114         1.19               2.78        0.427          0.026   0.493
## 13     0.238         0.887              2.31        0.380          0.186   0.476
## 14     0.279         0.573              1.43        0.397          0.136   0.500
## 15     0.478         0.875              2.38        0.357          0.359   0.564
##    fg3_per_g fg3a_per_g fg3_asted fg3_pct_per_g games ft_per_g fta_per_g
##        <dbl>      <dbl>     <dbl>         <dbl> <dbl>    <dbl>     <dbl>
##  1    0.0667      0.278     0.317        0.0549  32.3     2.67      4.22
##  2    0           0.1       1            0       33       6         9   
##  3    0.262       0.812     0.578        0.168   34.2     3.76      6.05
##  4    0.0444      0.211     0.444        0.0648  76.2     1.66      2.72
##  5    1.75        4.56      0.479        0.379   33.1     4.84      6.25
##  6    0.5         1.44      0.847        0.340   60.4     2.72      3.8 
##  7    1.1         3.21      0.891        0.331   36.3     4.13      5.86
##  8    1.17        3.12      0.876        0.405   80.9     1.91      2.54
##  9    1.54        4.21      0.784        0.362   53.4     2.87      3.72
## 10    0.667       1.82      0.921        0.356   35.3     2.8       4.28
## 11    1.52        4.29      0.731        0.342   41.6     4.07      5.38
## 12    3.7        10.3       0.263        0.359   32       7.4       8.6 
## 13    1.95        5.3       0.614        0.367   78.4     3.69      4.69
## 14    1.8         4.67      0.750        0.381   74.5     2.23      2.81
## 15    0.8         2.17      0.808        0.320   47.8     3.59      4.92
##    ast_per_g_college orb_per_g drb_per_g stl_per_g blk_per_g tov_per_g
##                <dbl>     <dbl>     <dbl>     <dbl>     <dbl>     <dbl>
##  1             0.956     2.83       5.59     1         2.89       1.73
##  2             4.8       3.1        8.6      2         0.8        3.4 
##  3             1.54      3.54       5.84     1.02      1.98       2.16
##  4             0.844     2.24       4.36     0.544     1.96       1.3 
##  5             5.4       0.95       4        1.52      0.55       3.32
##  6             1.23      2.1        4.68     0.7       1.39       1.55
##  7             2.2       2.43       5.14     1.03      0.786      2.44
##  8             1.51      1.21       3.24     0.95      0.544      1.32
##  9             2.36      1.30       4.33     1.22      0.58       2.05
## 10             1.73      2.67       5.72     0.817     2.22       1.93
## 11             2.94      1.33       4.29     1.38      0.633      2.47
## 12             8.7       0.4        3.5      1.7       0.3        5.2 
## 13             3.81      0.693      3.34     1.41      0.364      2.7 
## 14             3.28      0.808      3.08     1.15      0.423      1.76
## 15             1.59      2.31       5.01     1.15      1.18       2.12
##    pts_per_g_college
##                <dbl>
##  1             11.8 
##  2             19.2 
##  3             17.7 
##  4              8.36
##  5             19.1 
##  6             12.3 
##  7             17.9 
##  8             11.0 
##  9             14.4 
## 10             14.0 
## 11             16.6 
## 12             27.4 
## 13             16.9 
## 14             12.4 
## 15             14.7
df_cbb.sample |> group_by(group) |>
  summarize(across(everything(), median))
## # A tibble: 15 × 27
##    group dunk_made dunk_attempts dunk_pct rim_made rim_attempts rim_pct
##    <int>     <dbl>         <dbl>    <dbl>    <dbl>        <dbl>   <dbl>
##  1     1     2.31          2.34     0.956    3.63          4.77   0.768
##  2     2     1.70          1.85     0.918    4.82          6.67   0.723
##  3     3     2.01          2.13     0.953    5.05          6.59   0.774
##  4     4     0.657         0.705    0.944    1.17          1.54   0.752
##  5     5     0.345         0.405    0.91     2.66          4.33   0.628
##  6     6     0.566         0.641    0.932    1.87          2.59   0.728
##  7     7     1.05          1.13     0.924    3.3           4.92   0.67 
##  8     8     0.285         0.299    0.907    1.09          1.53   0.691
##  9     9     0.453         0.453    0.925    1.62          2.45   0.667
## 10    10     1.51          1.61     0.951    3.12          4.13   0.708
## 11    11     0.447         0.457    0.916    2.22          3.71   0.640
## 12    12     0             0        0        3.28          6.28   0.522
## 13    13     0.108         0.108    0.966    1.04          1.70   0.594
## 14    14     0.136         0.167    0.875    0.807         1.35   0.623
## 15    15     0.806         0.885    0.938    2.43          3.58   0.68 
## # ℹ 20 more variables: rim_asted <dbl>, other2pt_made <dbl>,
## #   other2pt_attempts <dbl>, other2pt_pct <dbl>, other2pt_asted <dbl>,
## #   fg2_pct <dbl>, fg3_per_g <dbl>, fg3a_per_g <dbl>, fg3_asted <dbl>,
## #   fg3_pct_per_g <dbl>, games <dbl>, ft_per_g <dbl>, fta_per_g <dbl>,
## #   ast_per_g_college <dbl>, orb_per_g <dbl>, drb_per_g <dbl>, stl_per_g <dbl>,
## #   blk_per_g <dbl>, tov_per_g <dbl>, pts_per_g_college <dbl>
df_career_stats |> filter(player == "Jaylen Brown") |>
  select(player, pick_overall, year, pts_per_g, trb_per_g, ast_per_g, vorp, g)
## # A tibble: 1 × 8
##   player       pick_overall  year pts_per_g trb_per_g ast_per_g  vorp     g
##   <chr>               <dbl> <dbl>     <dbl>     <dbl>     <dbl> <dbl> <dbl>
## 1 Jaylen Brown            3  2016      18.6       5.3       2.4   9.7   540
df_career_stats |> filter(year >= 2010)|> 
  group_by(pick_overall) |> 
  summarize(across(everything(), function(x) mean(x))) |>
  select(pick_overall, pts_per_g, trb_per_g, ast_per_g, vorp, g)
## Warning: There were 122 warnings in `summarize()`.
## The first warning was:
## ℹ In argument: `across(everything(), function(x) mean(x))`.
## ℹ In group 1: `pick_overall = 1`.
## Caused by warning in `mean.default()`:
## ! argument is not numeric or logical: returning NA
## ℹ Run `dplyr::last_dplyr_warnings()` to see the 121 remaining warnings.
## # A tibble: 61 × 6
##    pick_overall pts_per_g trb_per_g ast_per_g  vorp     g
##           <dbl>     <dbl>     <dbl>     <dbl> <dbl> <dbl>
##  1            1     18.9       6.65      4.28 14.2   380.
##  2            2     14.7       5.14      3.19  3.91  338.
##  3            3     17.4       6.58      3.31 12.0   420.
##  4            4     12         5.32      1.91  3     372.
##  5            5     12.5       4.61      3.36  4.84  377.
##  6            6      9.92      4.5       2.17  6.57  346.
##  7            7     12.4       5.08      2.51  3.77  430.
##  8            8      9.21      3.31      1.91  1.71  390.
##  9            9     10.3       4.76      2.31  5.61  418.
## 10           10      9.74      3.46      2.13  5.14  370.
## # ℹ 51 more rows
df_cluster2 <- df_cbb_scaled

df_cluster2$name <- rownames(df_cbb)
df_cluster2 <- df_cluster2 |> relocate(name, .before = dunk_made)

df_cluster2 <- column_to_rownames(df_cluster2, var = "name")

k1 <- 15
k2 <- 10
k3 <- 5

k15 <- kmeans(df_cluster2, centers = k1, nstart = 50)
k10 <-  kmeans(df_cluster2, centers = k2, nstart = 50)
k5 <- kmeans(df_cluster2, centers = k3, nstart = 50)

temp_assign <- k15$cluster

combine <- function(df, kmeans) {
  temp_assign <- kmeans$cluster
  df_temp <- as.tibble(data.frame(value = temp_assign, name = names(temp_assign)))
  if(!("all_cluster" %in% colnames(df_temp))) {
    df_temp <- df_temp |> rename(all_cluster = value)
  }
  cluster_df <- left_join(df, df_temp, by = c("name"))
  cluster_df <- cluster_df |> relocate(pc_cluster, .after = all_cluster)
  # cluster_df <- cluster_df |> group_by(all_cluster, pc_cluster) |> summarize(n = n())
  # return(cluster_df)
  return(cluster_df)
}

groups15 <- combine(cluster_df15, k15) |> arrange(pc_cluster, all_cluster)
groups10 <- combine(cluster_df10, k10)
groups5 <- combine(cluster_df5, k5)

groups15
## # A tibble: 165 × 3
##    name           all_cluster pc_cluster
##    <chr>                <int>      <int>
##  1 Mo Bamba                 3          1
##  2 Anthony Davis            3          1
##  3 Bam Adebayo             11          1
##  4 Nerlens Noel            11          1
##  5 Jaxson Hayes            11          1
##  6 Derrick Favors          11          1
##  7 Jalen Duren             11          1
##  8 Andre Drummond          11          1
##  9 Joel Embiid             15          1
## 10 Ben Simmons              7          2
## # ℹ 155 more rows
df_cbb <- df_cbb |> rownames_to_column(var = "name")
df_groups <- left_join(df_cbb, groups15, by = c("name"))
df_cbb <- df_cbb |> column_to_rownames(var = "name")
library(ggforce)
df_groups <- df_groups |> 
  mutate(bust = if_else(name %in% bust_list, 1, 0),
         good = if_else(name %in% good_list, 1, 0))


df_groups |> group_by(all_cluster) |>
  summarize(n = n(),
            across(everything(), mean),
            ratio = good / bust) |> 
  select(-name) |>
  print(width = Inf)
## Warning: There were 15 warnings in `summarize()`.
## The first warning was:
## ℹ In argument: `across(everything(), mean)`.
## ℹ In group 1: `all_cluster = 1`.
## Caused by warning in `mean.default()`:
## ! argument is not numeric or logical: returning NA
## ℹ Run `dplyr::last_dplyr_warnings()` to see the 14 remaining warnings.
## # A tibble: 15 × 32
##    all_cluster     n dunk_made dunk_attempts dunk_pct rim_made rim_attempts
##          <int> <dbl>     <dbl>         <dbl>    <dbl>    <dbl>        <dbl>
##  1           1    15    0.433         0.481     0.902     1.98         3.01
##  2           2     9    0.543         0.589     0.896     3.14         5.06
##  3           3     3    2.12          2.17      0.979     3.57         4.34
##  4           4    22    0.684         0.743     0.920     1.88         2.70
##  5           5     2    0             0         0         1.42         2.14
##  6           6     1    0             0         0         3.28         6.28
##  7           7     4    2.30          2.44      0.942     5.86         7.67
##  8           8    22    0.263         0.294     0.887     1.10         1.78
##  9           9    14    1.28          1.39      0.925     3.13         4.45
## 10          10     7    0.411         0.445     0.934     1.88         2.72
## 11          11    10    1.96          2.09      0.941     3.73         5.00
## 12          12    12    0.0783        0.0854    0.932     1.11         1.83
## 13          13    14    0.376         0.430     0.896     1.23         1.72
## 14          14    19    0.466         0.512     0.907     1.98         3.13
## 15          15    11    0.780         0.823     0.947     1.78         2.40
##    rim_pct rim_asted other2pt_made other2pt_attempts other2pt_pct other2pt_asted
##      <dbl>     <dbl>         <dbl>             <dbl>        <dbl>          <dbl>
##  1   0.663     0.314         0.574              1.92        0.290         0.176 
##  2   0.618     0.228         1.62               4.33        0.366         0.0983
##  3   0.825     0.556         0.964              2.55        0.391         0.281 
##  4   0.700     0.548         0.884              2.14        0.414         0.474 
##  5   0.664     0.062         0.788              1.82        0.438         0.024 
##  6   0.522     0.114         1.19               2.78        0.427         0.026 
##  7   0.763     0.529         1.58               3.84        0.428         0.402 
##  8   0.621     0.407         0.585              1.63        0.357         0.272 
##  9   0.707     0.497         1.65               4.31        0.382         0.409 
## 10   0.698     0.300         0.731              1.71        0.449         0.104 
## 11   0.745     0.621         1.06               2.90        0.366         0.456 
## 12   0.613     0.214         0.907              2.25        0.402         0.181 
## 13   0.719     0.538         0.733              1.69        0.438         0.313 
## 14   0.637     0.372         1.65               4.32        0.382         0.220 
## 15   0.750     0.681         0.685              1.80        0.357         0.606 
##    fg2_pct fg3_per_g fg3a_per_g fg3_asted fg3_pct_per_g games ft_per_g fta_per_g
##      <dbl>     <dbl>      <dbl>     <dbl>         <dbl> <dbl>    <dbl>     <dbl>
##  1   0.514    0.947       2.89      0.724        0.313   61.6     3.55      5.07
##  2   0.508    1.41        4         0.582        0.349   36.7     4.62      6.17
##  3   0.664    0.633       1.83      0.879        0.296   34       2.83      4.07
##  4   0.576    0.955       2.43      0.893        0.434   59.3     3.09      4.09
##  5   0.526    1.15        2.65      0.572        0.423   42       3.65      4.35
##  6   0.493    3.7        10.3       0.263        0.359   32       7.4       8.6 
##  7   0.648    0.425       1.28      0.905        0.252   33.5     4.47      6.75
##  8   0.496    1.83        4.94      0.780        0.369   65.6     2.28      2.87
##  9   0.550    0.629       1.8       0.940        0.308   38.9     3.47      5.12
## 10   0.584    1.53        4.03      0.717        0.370   41.9     1.86      2.57
## 11   0.610    0           0.02      0            0       36.7     2.9       4.79
## 12   0.488    1.98        5.3       0.602        0.371   83.5     3.82      4.78
## 13   0.570    0.857       2.33      0.924        0.356   82.9     2.04      2.77
## 14   0.489    1.82        4.98      0.777        0.359   40.2     3.83      4.91
## 15   0.589    0.0182      0.182     0.432        0.0341  64.6     1.97      3.15
##    ast_per_g_college orb_per_g drb_per_g stl_per_g blk_per_g tov_per_g
##                <dbl>     <dbl>     <dbl>     <dbl>     <dbl>     <dbl>
##  1             3.27      1.51       4.41     1.6       0.753      2.68
##  2             5.32      1.02       4.12     1.46      0.522      3.28
##  3             1.23      2.67       7.6      1         4.03       1.47
##  4             1.31      2.06       5.18     0.764     1.31       1.69
##  5             6.15      0.35       2.5      1.3       0.3        2.55
##  6             8.7       0.4        3.5      1.7       0.3        5.2 
##  7             2.5       3.5        7.32     1.38      1.35       2.52
##  8             1.91      0.982      3.33     1.07      0.409      1.66
##  9             1.84      2.78       5.43     1.04      1.54       2.16
## 10             4.57      0.914      3.54     1.8       0.571      2.26
## 11             1.04      3.02       4.88     1         2.27       1.9 
## 12             3.62      0.7        3.3      1.35      0.35       2.48
## 13             1.32      1.39       3.24     0.871     0.643      1.34
## 14             2.2       1.45       4.28     1.06      0.6        2.18
## 15             0.927     2.28       4.5      0.564     1.95       1.4 
##    pts_per_g_college pc_cluster   bust  good ratio
##                <dbl>      <dbl>  <dbl> <dbl> <dbl>
##  1             13.9       11.3  0      0.333 Inf  
##  2             18.8        6.56 0      0.444 Inf  
##  3             13.7        4    0      0.667 Inf  
##  4             13.9        8.77 0.136  0.409   3  
##  5             12.4        9.5  0.5    0.5     1  
##  6             27.4       12    0      1     Inf  
##  7             20.7        2.75 0      0.5   Inf  
##  8             12.9       10.7  0.182  0.273   1.5
##  9             15.4        9.21 0.143  0.357   2.5
## 10             12.6       10.7  0      0.429 Inf  
## 11             13.1        3    0.1    0.5     5  
## 12             17.4       13.1  0.0833 0.5     6  
## 13             10.7        7.36 0      0.286 Inf  
## 14             17.1        9.95 0.158  0.316   2  
## 15              9.08       4.09 0      0.455 Inf
df_pc <- left_join(df_cluster |> rownames_to_column("name"), groups15, by = "name")
df_pc <- left_join(df_pc, df_groups |> select(name, bust, good), by = "name")

df_pc <- df_pc |> column_to_rownames("name")
df_pc <- df_pc |> 
  mutate(
    pc_cluster = as.factor(pc_cluster),
    all_cluster = as.factor(all_cluster)
  )

hulls <- df_pc |>
  group_by(all_cluster) |>
  slice(chull(PC1, PC2))

p <- ggplot(data = df_pc, aes(x = PC1, y = PC2, color = all_cluster)) + 
  geom_polygon(data = hulls, 
               aes(group = all_cluster, color = all_cluster, fill = all_cluster), 
               alpha = 0.2) +
  geom_point() +
  geom_point(data = df_pc |> filter(bust == 1), size = 3, color = "black", shape = 10, show.legend = FALSE) + 
  geom_point(data = df_pc |> filter(good == 1), size = 3, color = "black", shape = 5, show.legend = FALSE) +
  scale_x_reverse() +
  labs(title = "Clusters using higher dimensional data",
       x = "Dimension 1",
       y = "Dimension 2",
       color = "Cluster",
       fill = "Cluster"
       )
  

p

p2 <- p + 
  geom_label_repel(data = df_pc |> rownames_to_column(var = "name") |> filter(bust == 1 | good == 1),
                   aes(label = name),
                   size = 1.6,
                   max.overlaps = 20,
                   fill = NA, 
                   label.size = NA,
                   segment.size = 0.2
                   )
p2

df_groups |> group_by(pc_cluster) |>
  summarize(n = n(),
            across(everything(), mean),
            ratio = good / bust) |> 
  select(-name) |>
  print(width = Inf)
## Warning: There were 15 warnings in `summarize()`.
## The first warning was:
## ℹ In argument: `across(everything(), mean)`.
## ℹ In group 1: `pc_cluster = 1`.
## Caused by warning in `mean.default()`:
## ! argument is not numeric or logical: returning NA
## ℹ Run `dplyr::last_dplyr_warnings()` to see the 14 remaining warnings.
## # A tibble: 15 × 32
##    pc_cluster     n dunk_made dunk_attempts dunk_pct rim_made rim_attempts
##         <int> <dbl>     <dbl>         <dbl>    <dbl>    <dbl>        <dbl>
##  1          1     9     2.09          2.20     0.955    3.54          4.56
##  2          2     1     1.70          1.85     0.918    4.82          6.67
##  3          3     8     2.06          2.18     0.946    4.94          6.46
##  4          4     9     0.745         0.791    0.943    1.45          1.98
##  5          5     8     0.387         0.428    0.770    2.70          4.35
##  6          6    16     0.660         0.716    0.915    1.94          2.65
##  7          7     7     1.17          1.28     0.915    3.34          5.06
##  8          8    18     0.289         0.323    0.916    1.10          1.63
##  9          9    25     0.435         0.480    0.916    1.63          2.44
## 10         10     6     1.43          1.55     0.930    3.03          4.19
## 11         11    18     0.468         0.514    0.912    2.30          3.65
## 12         12     1     0             0        0        3.28          6.28
## 13         13    14     0.108         0.125    0.899    1.18          1.98
## 14         14    13     0.137         0.160    0.802    0.865         1.40
## 15         15    12     0.823         0.890    0.929    2.45          3.62
##    rim_pct rim_asted other2pt_made other2pt_attempts other2pt_pct other2pt_asted
##      <dbl>     <dbl>         <dbl>             <dbl>        <dbl>          <dbl>
##  1   0.781     0.647         0.922              2.70        0.344          0.498
##  2   0.723     0.484         1.67               4.88        0.342          0.309
##  3   0.764     0.546         1.62               3.88        0.424          0.376
##  4   0.747     0.698         0.608              1.57        0.357          0.626
##  5   0.624     0.221         1.52               4.03        0.37           0.083
##  6   0.735     0.583         0.884              2.10        0.423          0.474
##  7   0.662     0.440         1.90               5.17        0.365          0.291
##  8   0.683     0.492         0.647              1.58        0.401          0.346
##  9   0.671     0.404         0.909              2.41        0.375          0.262
## 10   0.725     0.543         1.47               3.73        0.401          0.463
## 11   0.633     0.309         1.31               3.56        0.362          0.161
## 12   0.522     0.114         1.19               2.78        0.427          0.026
## 13   0.602     0.238         0.887              2.31        0.380          0.186
## 14   0.623     0.279         0.573              1.43        0.397          0.136
## 15   0.678     0.478         0.875              2.38        0.357          0.359
##    fg2_pct fg3_per_g fg3a_per_g fg3_asted fg3_pct_per_g games ft_per_g fta_per_g
##      <dbl>     <dbl>      <dbl>     <dbl>         <dbl> <dbl>    <dbl>     <dbl>
##  1   0.618    0.0667      0.278     0.317        0.0549  32.3     2.67      4.22
##  2   0.561    0           0.1       1            0       33       6         9   
##  3   0.63     0.262       0.812     0.578        0.168   34.2     3.76      6.05
##  4   0.580    0.0444      0.211     0.444        0.0648  76.2     1.66      2.72
##  5   0.509    1.75        4.56      0.479        0.379   33.1     4.84      6.25
##  6   0.595    0.5         1.44      0.847        0.340   60.4     2.72      3.8 
##  7   0.511    1.1         3.21      0.891        0.331   36.3     4.13      5.86
##  8   0.548    1.17        3.12      0.876        0.405   80.9     1.91      2.54
##  9   0.525    1.54        4.21      0.784        0.362   53.4     2.87      3.72
## 10   0.583    0.667       1.82      0.921        0.356   35.3     2.8       4.28
## 11   0.501    1.52        4.29      0.731        0.342   41.6     4.07      5.38
## 12   0.493    3.7        10.3       0.263        0.359   32       7.4       8.6 
## 13   0.476    1.95        5.3       0.614        0.367   78.4     3.69      4.69
## 14   0.500    1.8         4.67      0.750        0.381   74.5     2.23      2.81
## 15   0.564    0.8         2.17      0.808        0.320   47.8     3.59      4.92
##    ast_per_g_college orb_per_g drb_per_g stl_per_g blk_per_g tov_per_g
##                <dbl>     <dbl>     <dbl>     <dbl>     <dbl>     <dbl>
##  1             0.956     2.83       5.59     1         2.89       1.73
##  2             4.8       3.1        8.6      2         0.8        3.4 
##  3             1.54      3.54       5.84     1.02      1.98       2.16
##  4             0.844     2.24       4.36     0.544     1.96       1.3 
##  5             5.4       0.95       4        1.52      0.55       3.32
##  6             1.23      2.1        4.68     0.7       1.39       1.55
##  7             2.2       2.43       5.14     1.03      0.786      2.44
##  8             1.51      1.21       3.24     0.95      0.544      1.32
##  9             2.36      1.30       4.33     1.22      0.58       2.05
## 10             1.73      2.67       5.72     0.817     2.22       1.93
## 11             2.94      1.33       4.29     1.38      0.633      2.47
## 12             8.7       0.4        3.5      1.7       0.3        5.2 
## 13             3.81      0.693      3.34     1.41      0.364      2.7 
## 14             3.28      0.808      3.08     1.15      0.423      1.76
## 15             1.59      2.31       5.01     1.15      1.18       2.12
##    pts_per_g_college all_cluster   bust  good ratio
##                <dbl>       <dbl>  <dbl> <dbl> <dbl>
##  1             11.8         9.67 0      0.667 Inf  
##  2             19.2         7    0      1     Inf  
##  3             17.7         9    0.125  0.5     4  
##  4              8.36       14.8  0      0.333 Inf  
##  5             19.1         3.88 0      0.5   Inf  
##  6             12.3         7.06 0      0.5   Inf  
##  7             17.9         9.43 0      0.143 Inf  
##  8             11.0        10.1  0.0556 0.278   5  
##  9             14.4         7.16 0.2    0.36    1.8
## 10             14.0         8    0.333  0.333   1  
## 11             16.6         9.56 0.0556 0.444   8  
## 12             27.4         6    0      1     Inf  
## 13             16.9        11.1  0.0714 0.429   6  
## 14             12.4         7.85 0.154  0.308   2  
## 15             14.7         5.08 0.167  0.167   1
hulls <- df_pc |>
  group_by(pc_cluster) |>
  slice(chull(PC1, PC2))

p <- ggplot(data = df_pc, aes(x = PC1, y = PC2, color = pc_cluster)) + 
  geom_polygon(data = hulls, 
               aes(group = pc_cluster, color = pc_cluster, fill = pc_cluster), 
               alpha = 0.2) +
  geom_point() +
  geom_point(data = df_pc |> filter(bust == 1), size = 3, color = "black", shape = 10, show.legend = FALSE) + 
  geom_point(data = df_pc |> filter(good == 1), size = 3, color = "black", shape = 5, show.legend = FALSE) +
  scale_x_reverse() +
  labs(title = "Clusters using higher dimensional data",
       x = "Dimension 1",
       y = "Dimension 2",
       color = "Cluster",
       fill = "Cluster"
  )

p

p2 <- p + 
  geom_label_repel(data = df_pc |> rownames_to_column(var = "name") |> filter(bust == 1 | good == 1),
                   aes(label = name),
                   size = 1.6,
                   max.overlaps = 20,
                   fill = NA, 
                   label.size = NA,
                   segment.size = 0.2
                   )

p2